From e8d1287043e8257bce3b8fe817f1856f3d87b390 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Fri, 18 Jan 2019 02:42:16 +0100 Subject: [PATCH 001/235] [src] Fix 'sausage-time' issue which occurs with disabled MBR decoding. (#2996) --- src/lat/sausages.cc | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc index e376272510c..b851bc3604c 100644 --- a/src/lat/sausages.cc +++ b/src/lat/sausages.cc @@ -53,7 +53,17 @@ void MinimumBayesRisk::MbrDecode() { } // build the outputs (time, confidences), if (R_[q] != 0 || opts_.print_silence) { - one_best_times_.push_back(times_[q][0]); + // see which 'item' from the sausage-bin should we select, + // (not necessarily the 1st one when MBR decoding disabled) + int32 s = 0; + for (int32 j=0; j 1 && one_best_times_[i-2].second > one_best_times_[i-1].first) { // It's quite possible for this to happen, but it seems like it would @@ -76,8 +86,12 @@ void MinimumBayesRisk::MbrDecode() { one_best_times_[i-1].second = right; } BaseFloat confidence = 0.0; - for (int32 j = 0; j < gamma_[q].size(); j++) - if (gamma_[q][j].first == R_[q]) confidence = gamma_[q][j].second; + for (int32 j = 0; j < gamma_[q].size(); j++) { + if (gamma_[q][j].first == R_[q]) { + confidence = gamma_[q][j].second; + break; + } + } one_best_confidences_.push_back(confidence); } } From 99dc4d8857f33165e5a06cbf4e9c9d76e4e41e10 Mon Sep 17 00:00:00 2001 From: Ashish Arora Date: Mon, 21 Jan 2019 13:53:42 -0500 Subject: [PATCH 002/235] [egs] Add scripts for yomdle Russian (OCR task) (#2953) --- egs/iam/v1/RESULTS | 42 ++++ egs/yomdle_russian/README.txt | 3 + egs/yomdle_russian/v1/cmd.sh | 12 ++ egs/yomdle_russian/v1/image | 1 + .../v1/local/chain/compare_wer.sh | 66 ++++++ .../v1/local/chain/run_cnn_e2eali.sh | 1 + .../v1/local/chain/run_e2e_cnn.sh | 129 +++++++++++ .../local/chain/tuning/run_cnn_e2eali_1a.sh | 203 ++++++++++++++++++ egs/yomdle_russian/v1/local/check_tools.sh | 43 ++++ .../v1/local/extract_features.sh | 48 +++++ egs/yomdle_russian/v1/local/prepare_dict.sh | 26 +++ .../v1/local/prepare_lexicon.py | 34 +++ egs/yomdle_russian/v1/local/process_corpus.py | 30 +++ egs/yomdle_russian/v1/local/process_data.py | 65 ++++++ egs/yomdle_russian/v1/local/score.sh | 5 + egs/yomdle_russian/v1/local/train_lm.sh | 127 +++++++++++ egs/yomdle_russian/v1/local/wer_output_filter | 17 ++ egs/yomdle_russian/v1/local/yomdle | 1 + egs/yomdle_russian/v1/path.sh | 6 + egs/yomdle_russian/v1/run_end2end.sh | 186 ++++++++++++++++ egs/yomdle_russian/v1/steps | 1 + egs/yomdle_russian/v1/utils | 1 + 22 files changed, 1047 insertions(+) create mode 100644 egs/iam/v1/RESULTS create mode 100644 egs/yomdle_russian/README.txt create mode 100755 egs/yomdle_russian/v1/cmd.sh create mode 120000 egs/yomdle_russian/v1/image create mode 100755 egs/yomdle_russian/v1/local/chain/compare_wer.sh create mode 120000 egs/yomdle_russian/v1/local/chain/run_cnn_e2eali.sh create mode 100755 egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh create mode 100755 egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh create mode 100755 egs/yomdle_russian/v1/local/check_tools.sh create mode 100755 egs/yomdle_russian/v1/local/extract_features.sh create mode 100755 egs/yomdle_russian/v1/local/prepare_dict.sh create mode 100755 egs/yomdle_russian/v1/local/prepare_lexicon.py create mode 100755 egs/yomdle_russian/v1/local/process_corpus.py create mode 100755 egs/yomdle_russian/v1/local/process_data.py create mode 100755 egs/yomdle_russian/v1/local/score.sh create mode 100755 egs/yomdle_russian/v1/local/train_lm.sh create mode 100755 egs/yomdle_russian/v1/local/wer_output_filter create mode 120000 egs/yomdle_russian/v1/local/yomdle create mode 100755 egs/yomdle_russian/v1/path.sh create mode 100755 egs/yomdle_russian/v1/run_end2end.sh create mode 120000 egs/yomdle_russian/v1/steps create mode 120000 egs/yomdle_russian/v1/utils diff --git a/egs/iam/v1/RESULTS b/egs/iam/v1/RESULTS new file mode 100644 index 00000000000..b25cb3cd772 --- /dev/null +++ b/egs/iam/v1/RESULTS @@ -0,0 +1,42 @@ +Run_end2end.sh (WER using lang_test, lang_unk) +flat_start: + • %WER 14.41 [ 2671 / 18542, 262 ins, 561 del, 1848 sub ] exp/chain/e2e_cnn_1a/decode_test/wer_11_1.0 + • %WER 15.21 [ 2821 / 18542, 375 ins, 500 del, 1946 sub ] exp/chain/e2e_cnn_1a/decode_test/wer_11_1.0 + +cnn_e2eali_1a: + • %WER 11.94 [ 2214 / 18542, 267 ins, 380 del, 1567 sub ] exp/chain/cnn_e2eali_1a/decode_test/wer_9_1.0 + • %WER 13.30 [ 2467 / 18542, 441 ins, 330 del, 1696 sub ] exp/chain/cnn_e2eali_1a/decode_test/wer_9_0.5 + +cnn_e2eali_1b: + • %WER 11.20 [ 2076 / 18542, 260 ins, 335 del, 1481 sub ] exp/chain/cnn_e2eali_1b/decode_test/wer_9_1.0 + • %WER 12.46 [ 2311 / 18542, 371 ins, 326 del, 1614 sub ] exp/chain/cnn_e2eali_1b/decode_test/wer_9_1.0 + +cnn_e2eali_1c: + • %WER 9.90 [ 1836 / 18542, 257 ins, 227 del, 1352 sub ] exp/chain/cnn_e2eali_1c/decode_test/wer_10_1.0 + • %WER 12.10 [ 2243 / 18542, 411 ins, 269 del, 1563 sub ] exp/chain/cnn_e2eali_1c/decode_test/wer_12_0.5 + + +Run.sh (WER using lang_test, lang_unk) +cnn_1a: + • %WER 15.18 [ 2815 / 18542, 285 ins, 509 del, 2021 sub ] exp/chain/cnn_1a/decode_test/wer_11_0.0 + • %WER 16.88 [ 3130 / 18542, 444 ins, 611 del, 2075 sub ] exp/chain/cnn_1a/decode_test/wer_11_0.0 + +cnn_chainali_1a: + • %WER 14.09 [ 2612 / 18542, 245 ins, 505 del, 1862 sub ] exp/chain/cnn_chainali_1a/decode_test/wer_13_0.0 + • %WER 15.93 [ 2954 / 18542, 454 ins, 470 del, 2030 sub ] exp/chain/cnn_chainali_1a/decode_test/wer_10_0.0 + +cnn_chainali_1b: + • %WER 13.29 [ 2465 / 18542, 221 ins, 499 del, 1745 sub ] exp/chain/cnn_chainali_1b/decode_test/wer_12_0.5 + • %WER 15.09 [ 2798 / 18542, 418 ins, 468 del, 1912 sub ] exp/chain/cnn_chainali_1b/decode_test/wer_10_0.5 + +cnn_chainali_1c: + • %WER 11.59 [ 2149 / 18542, 276 ins, 362 del, 1511 sub ] exp/chain/cnn_chainali_1c/decode_test/wer_9_0.0 + • %WER 13.75 [ 2550 / 18542, 465 ins, 368 del, 1717 sub ] exp/chain/cnn_chainali_1c/decode_test/wer_8_0.0 + +cnn_chainali_1d: + • %WER 11.07 [ 2053 / 18542, 261 ins, 311 del, 1481 sub ] exp/chain/cnn_chainali_1c/decode_test/wer_9_0.0 + • %WER 12.95 [ 2402 / 18542, 436 ins, 313 del, 1653 sub ] exp/chain/cnn_chainali_1c/decode_test/wer_8_0.0 + +cnn_chainali_1e: + • %WER 10.03 [ 1859 / 18542, 226 ins, 291 del, 1342 sub ] exp/chain/cnn_chainali_1e/decode_test/wer_11_0.5 + %WER 12.15 [ 2253 / 18542, 406 ins, 282 del, 1565 sub ] exp/chain/cnn_chainali_1e/decode_test/wer_10_0.5 diff --git a/egs/yomdle_russian/README.txt b/egs/yomdle_russian/README.txt new file mode 100644 index 00000000000..3bf4cc8cd2d --- /dev/null +++ b/egs/yomdle_russian/README.txt @@ -0,0 +1,3 @@ +This directory contains example scripts for OCR on the Yomdle and Slam datasets. +Training is done on the Yomdle dataset and testing is done on Slam. +LM rescoring is also done with extra corpus data obtained from various sources diff --git a/egs/yomdle_russian/v1/cmd.sh b/egs/yomdle_russian/v1/cmd.sh new file mode 100755 index 00000000000..3d69546dfe8 --- /dev/null +++ b/egs/yomdle_russian/v1/cmd.sh @@ -0,0 +1,12 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. +export cmd="queue.pl" diff --git a/egs/yomdle_russian/v1/image b/egs/yomdle_russian/v1/image new file mode 120000 index 00000000000..1668ee99922 --- /dev/null +++ b/egs/yomdle_russian/v1/image @@ -0,0 +1 @@ +../../cifar/v1/image/ \ No newline at end of file diff --git a/egs/yomdle_russian/v1/local/chain/compare_wer.sh b/egs/yomdle_russian/v1/local/chain/compare_wer.sh new file mode 100755 index 00000000000..80f31e0f311 --- /dev/null +++ b/egs/yomdle_russian/v1/local/chain/compare_wer.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b} + +# Copyright 2017 Chun Chieh Chang +# 2017 Ashish Arora + +if [ $# == 0 ]; then + echo "Usage: $0: [ ... ]" + echo "e.g.: $0 exp/chain/cnn{1a,1b}" + exit 1 +fi + +echo "# $0 $*" +used_epochs=false + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +echo -n "# WER " +for x in $*; do + wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER (rescored) " +for x in $*; do + wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# CER " +for x in $*; do + cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}') + printf "% 10s" $cer +done +echo + +echo -n "# CER (rescored) " +for x in $*; do + cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}') + printf "% 10s" $cer +done +echo + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/yomdle_russian/v1/local/chain/run_cnn_e2eali.sh b/egs/yomdle_russian/v1/local/chain/run_cnn_e2eali.sh new file mode 120000 index 00000000000..e2545b0186e --- /dev/null +++ b/egs/yomdle_russian/v1/local/chain/run_cnn_e2eali.sh @@ -0,0 +1 @@ +tuning/run_cnn_e2eali_1a.sh \ No newline at end of file diff --git a/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh b/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh new file mode 100755 index 00000000000..6f5742cd34b --- /dev/null +++ b/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +# Copyright 2017 Hossein Hadian +# This script does end2end chain training (i.e. from scratch) +# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ +# System e2e_cnn_1a +# score_basic rescoring + nomalized +# WER 16.24 11.0 +# WER (rescored) 15.63 10.5 +# CER 5.98 5.6 +# CER (rescored) 5.66 5.3 +# Final train prob 0.1376 +# Final valid prob 0.1913 +# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a +# exp/chain/e2e_cnn_1a: num-iters=27 nj=5..8 num-params=3.0M dim=40->470 combine=0.091->0.091 (over 1) logprob:train/valid[17,26,final]=(0.135,0.137,0.138/0.191,0.191,0.191) + +set -e +# configs for 'chain' +stage=0 +nj=30 +train_stage=-10 +get_egs_stage=-10 +affix=1a + +# training options +tdnn_dim=450 +minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4 +cmvn_opts="--norm-means=false --norm-vars=false" +train_set=train +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 1 ]; then + steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \ + --shared-phones true \ + --type mono \ + data/$train_set $lang $treedir + $cmd $treedir/log/make_phone_lm.log \ + cat data/$train_set/text \| \ + steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \ + utils/sym2int.pl -f 2- data/lang/phones.txt \| \ + chain-est-phone-lm --num-extra-lm-states=500 \ + ark:- $treedir/phone_lm.fst +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + cnn_opts="l2-regularize=0.075" + tdnn_opts="l2-regularize=0.075" + output_opts="l2-regularize=0.1" + common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" + common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=40 name=input + conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 + conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts + relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts + relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts + ## adding the layers for chain branch + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi + +if [ $stage -le 3 ]; then + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + --cmd "$cmd" \ + --feat.cmvn-opts "$cmvn_opts" \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.apply-deriv-weights true \ + --egs.stage $get_egs_stage \ + --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ + --chain.frame-subsampling-factor 4 \ + --chain.alignment-subsampling-factor 4 \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 3 \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir data/${train_set} \ + --tree-dir $treedir \ + --dir $dir || exit 1; +fi diff --git a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh new file mode 100755 index 00000000000..7301db33d85 --- /dev/null +++ b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a +# System cnn_e2eali_1a rescoring + nomalized +# WER 12.08 7.7 +# WER (rescored) 11.90 7.5 +# CER 3.60 3.4 +# CER (rescored) 3.42 3.2 +# Final train prob -0.0373 +# Final valid prob -0.0362 +# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a +# exp/chain/cnn_e2eali_1a: num-iters=74 nj=3..16 num-params=6.3M dim=40->848 combine=-0.039->-0.039 (over 1) xent:train/valid[48,73,final]=(-0.206,-0.153,-0.146/-0.191,-0.156,-0.151) logprob:train/valid[48,73,final]=(-0.044,-0.038,-0.037/-0.040,-0.037,-0.036) + +set -e -o pipefail +stage=0 +nj=30 +train_set=train +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. +affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# chain options +train_stage=-10 +xent_regularize=0.1 +frame_subsampling_factor=4 +# training chunk-options +chunk_width=340,300,200,100 +num_leaves=1000 +# we don't need extra left/right context for TDNN systems. +tdnn_dim=550 +# training options +srand=0 +remove_egs=false +dropout_schedule='0,0@0.20,0.2@0.50,0' +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 2 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ + --acoustic-scale 1.0 \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + ${train_data_dir} data/lang $e2echain_model_dir $lat_dir + echo "" >$lat_dir/splice_opts +fi + +if [ $stage -le 3 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor 1 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$cmd" $num_leaves ${train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 4 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" + tdnn_opts="l2-regularize=0.03" + output_opts="l2-regularize=0.04" + common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" + common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90" + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=40 name=input + conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 + conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-8,-4,0,4,8) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0 + + ## adding the layers for chain branch + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' mod?els... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 5 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$cmd" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.frame-subsampling-factor=$frame_subsampling_factor \ + --chain.alignment-subsampling-factor=1 \ + --chain.left-tolerance 3 \ + --chain.right-tolerance 3 \ + --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=900" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=16 \ + --trainer.frames-per-iter=2000000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=16 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=32,16 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi diff --git a/egs/yomdle_russian/v1/local/check_tools.sh b/egs/yomdle_russian/v1/local/check_tools.sh new file mode 100755 index 00000000000..5b4d3107d3b --- /dev/null +++ b/egs/yomdle_russian/v1/local/check_tools.sh @@ -0,0 +1,43 @@ +#!/bin/bash -u + +# Copyright 2015 (c) Johns Hopkins University (Jan Trmal ) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +[ -f ./path.sh ] && . ./path.sh +set +e + +command -v python3 >&/dev/null \ + || { echo >&2 "python3 not found on PATH. You will have to install Python3, preferably >= 3.6"; exit 1; } + +python3 -c "import numpy" +if [ $? -ne 0 ] ; then + echo >&2 "This recipe needs numpy installed." + exit 1 +fi + +python3 -c "import scipy" +if [ $? -ne 0 ] ; then + echo >&2 "This recipe needs scipy installed." + exit 1 +fi + +python3 -c "import scipy.misc; scipy.misc.__dict__['imread']" +if [ $? -ne 0 ] ; then + echo >&2 "This recipe needs scipy-image and Pillow installed." + exit 1 +fi + + +exit 0 diff --git a/egs/yomdle_russian/v1/local/extract_features.sh b/egs/yomdle_russian/v1/local/extract_features.sh new file mode 100755 index 00000000000..3880ebad3e8 --- /dev/null +++ b/egs/yomdle_russian/v1/local/extract_features.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright 2017 Yiwen Shao +# 2018 Ashish Arora + +# Apache 2.0 +# This script runs the make features script in parallel. + +nj=4 +cmd=run.pl +feat_dim=40 +augment='no_aug' +fliplr=false +echo "$0 $@" + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh || exit 1; + +data=$1 +featdir=$data/data +scp=$data/images.scp +logdir=$data/log + +mkdir -p $logdir +mkdir -p $featdir + +# make $featdir an absolute pathname +featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}` + +for n in $(seq $nj); do + split_scps="$split_scps $logdir/images.$n.scp" +done + +# split images.scp +utils/split_scp.pl $scp $split_scps || exit 1; + +$cmd JOB=1:$nj $logdir/extract_features.JOB.log \ + image/ocr/make_features.py $logdir/images.JOB.scp \ + --allowed_len_file_path $data/allowed_lengths.txt \ + --feat-dim $feat_dim --fliplr $fliplr --augment_type $augment \| \ + copy-feats --compress=true --compression-method=7 \ + ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp + +## aggregates the output scp's to get feats.scp +for n in $(seq $nj); do + cat $featdir/images.$n.scp || exit 1; +done > $data/feats.scp || exit 1 diff --git a/egs/yomdle_russian/v1/local/prepare_dict.sh b/egs/yomdle_russian/v1/local/prepare_dict.sh new file mode 100755 index 00000000000..22db5ae834d --- /dev/null +++ b/egs/yomdle_russian/v1/local/prepare_dict.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Copyright 2017 Hossein Hadian +# 2017 Babak Rekabdar +# 2017 Chun Chieh Chang +# 2017 Ashish Arora + +# This script prepares the dictionary. + +set -e +dir=data/local/dict +. ./utils/parse_options.sh || exit 1; + +mkdir -p $dir + +local/prepare_lexicon.py $dir + +cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1; + +echo ' SIL' >> $dir/lexicon.txt + +echo SIL > $dir/silence_phones.txt + +echo SIL >$dir/optional_silence.txt + +echo -n "" >$dir/extra_questions.txt diff --git a/egs/yomdle_russian/v1/local/prepare_lexicon.py b/egs/yomdle_russian/v1/local/prepare_lexicon.py new file mode 100755 index 00000000000..a68b1cb49dd --- /dev/null +++ b/egs/yomdle_russian/v1/local/prepare_lexicon.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +# Copyright 2017 Babak Rekabdar +# 2017 Hossein Hadian +# 2017 Chun Chieh Chang +# 2017 Ashish Arora +# Apache 2.0 + +# This script prepares lexicon for BPE. It gets the set of all words that occur in data/train/text. +# Since this lexicon is based on BPE, it replaces '|' with silence. + +import argparse +import os +import unicodedata +parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""") +parser.add_argument('dir', type=str, help='output path') +args = parser.parse_args() + +### main ### +lex = {} +text_path = os.path.join('data', 'train', 'text') +with open(text_path, 'r', encoding='utf-8') as f: + for line in f: + line_vect = line.strip().split(' ') + for i in range(1, len(line_vect)): + characters = list(line_vect[i]) + characters = " ".join([ 'SIL' if char == '|' else char for char in characters]) + characters = list(characters) + characters = "".join([ '' if char == '#' else char for char in characters]) + lex[line_vect[i]] = characters + +with open(os.path.join(args.dir, 'lexicon.txt'), 'w', encoding='utf-8') as fp: + for key in sorted(lex): + fp.write(key + " " + lex[key] + "\n") diff --git a/egs/yomdle_russian/v1/local/process_corpus.py b/egs/yomdle_russian/v1/local/process_corpus.py new file mode 100755 index 00000000000..b39030270b7 --- /dev/null +++ b/egs/yomdle_russian/v1/local/process_corpus.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# Copyright 2018 Ashish Arora +# Apache 2.0 +# This script reads valid phones and removes the lines in the corpus +# which have any other phone. + +import os +import sys, io + +phone_file = os.path.join('data/local/text/cleaned/phones.txt') +infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') +output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') +phone_dict = dict() +with open(phone_file, 'r', encoding='utf-8') as phone_fh: + for line in phone_fh: + line = line.strip().split()[0] + phone_dict[line] = line + +phone_dict[' '] = ' ' +corpus_text = list() +for line in infile: + text = line.strip() + skip_text = False + for phone in text: + if phone not in phone_dict.keys(): + skip_text = True + break + if not skip_text: + output.write(text+ '\n') + diff --git a/egs/yomdle_russian/v1/local/process_data.py b/egs/yomdle_russian/v1/local/process_data.py new file mode 100755 index 00000000000..d7546b0a803 --- /dev/null +++ b/egs/yomdle_russian/v1/local/process_data.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +# Copyright 2018 Ashish Arora +# 2018 Chun Chieh Chang + +""" This script reads the extracted Tamil OCR (yomdle and slam) database files + and creates the following files (for the data subset selected via --dataset): + text, utt2spk, images.scp. + Eg. local/process_data.py data/download/ data/local/splits/train.txt data/train + + Eg. text file: english_phone_books_0001_1 To sum up, then, it would appear that + utt2spk file: english_phone_books_0001_0 english_phone_books_0001 + images.scp file: english_phone_books_0001_0 \ + data/download/truth_line_image/english_phone_books_0001_0.png +""" + +import argparse +import os +import sys +import csv +import itertools +import unicodedata +import re +import string +import unicodedata +parser = argparse.ArgumentParser(description="Creates text, utt2spk, and images.scp files") +parser.add_argument('database_path', type=str, help='Path to data') +parser.add_argument('data_split', type=str, help='Path to file that contain datasplits') +parser.add_argument('out_dir', type=str, help='directory to output files') +args = parser.parse_args() + +### main ### +print("Processing '{}' data...".format(args.out_dir)) + +text_file = os.path.join(args.out_dir, 'text') +text_fh = open(text_file, 'w', encoding='utf-8') +utt2spk_file = os.path.join(args.out_dir, 'utt2spk') +utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8') +image_file = os.path.join(args.out_dir, 'images.scp') +image_fh = open(image_file, 'w', encoding='utf-8') + +with open(args.data_split) as f: + for line in f: + line = line.strip() + image_id = line + image_filename = image_id + '.png' + image_filepath = os.path.join(args.database_path, 'truth_line_image', image_filename) + if not os.path.isfile (image_filepath): + print("File does not exist {}".format(image_filepath)) + continue + line_id = int(line.split('_')[-1]) + csv_filename = '_'.join(line.split('_')[:-1]) + '.csv' + csv_filepath = os.path.join(args.database_path, 'truth_csv', csv_filename) + csv_file = open(csv_filepath, 'r', encoding='utf-8') + for row in csv.reader(csv_file): + if row[1] == image_filename: + text = row[11] + text_vect = text.split() # this is to avoid non-utf-8 spaces + text = " ".join(text_vect) + #text_normalized = unicodedata.normalize('NFD', text).replace('\n', '') + if not text: + continue + text_fh.write(image_id + ' ' + text + '\n') + utt2spk_fh.write(image_id + ' ' + '_'.join(line.split('_')[:-1]) + '\n') + image_fh.write(image_id + ' ' + image_filepath + '\n') diff --git a/egs/yomdle_russian/v1/local/score.sh b/egs/yomdle_russian/v1/local/score.sh new file mode 100755 index 00000000000..31564d25326 --- /dev/null +++ b/egs/yomdle_russian/v1/local/score.sh @@ -0,0 +1,5 @@ +#!/bin/bash + + +steps/scoring/score_kaldi_wer.sh "$@" +steps/scoring/score_kaldi_cer.sh --stage 2 "$@" diff --git a/egs/yomdle_russian/v1/local/train_lm.sh b/egs/yomdle_russian/v1/local/train_lm.sh new file mode 100755 index 00000000000..c73c42fb7dc --- /dev/null +++ b/egs/yomdle_russian/v1/local/train_lm.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +# Copyright 2016 Vincent Nguyen +# 2016 Johns Hopkins University (author: Daniel Povey) +# 2017 Ashish Arora +# 2017 Hossein Hadian +# Apache 2.0 +# +# This script trains a LM on the training transcriptions and corpus text. +# It is based on the example scripts distributed with PocoLM + +# It will check if pocolm is installed and if not will proceed with installation + +set -e +stage=0 +dir=data/local/local_lm +order=6 +echo "$0 $@" # Print the command line for logging +. ./utils/parse_options.sh || exit 1; + +lm_dir=${dir}/data + + +mkdir -p $dir +. ./path.sh || exit 1; # for KALDI_ROOT +export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH +( # First make sure the pocolm toolkit is installed. + cd $KALDI_ROOT/tools || exit 1; + if [ -d pocolm ]; then + echo Not installing the pocolm toolkit since it is already there. + else + echo "$0: Please install the PocoLM toolkit with: " + echo " cd ../../../tools; extras/install_pocolm.sh; cd -" + exit 1; + fi +) || exit 1; + +bypass_metaparam_optim_opt= +# If you want to bypass the metaparameter optimization steps with specific metaparameters +# un-comment the following line, and change the numbers to some appropriate values. +# You can find the values from output log of train_lm.py. +# These example numbers of metaparameters is for 4-gram model (with min-counts) +# running with train_lm.py. +# The dev perplexity should be close to the non-bypassed model. +#bypass_metaparam_optim_opt="--bypass-metaparameter-optimization=0.031,0.860,0.678,0.194,0.037,0.006,0.928,0.712,0.454,0.220,0.926,0.844,0.749,0.358,0.966,0.879,0.783,0.544,0.966,0.826,0.674,0.450" +# Note: to use these example parameters, you may need to remove the .done files +# to make sure the make_lm_dir.py be called and tain only 3-gram model +#for order in 3; do +#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done + +if [ $stage -le 0 ]; then + mkdir -p ${dir}/data + mkdir -p ${dir}/data/text + + echo "$0: Getting the Data sources" + + rm ${dir}/data/text/* 2>/dev/null || true + + # use the validation data as the dev set. + # Note: the name 'dev' is treated specially by pocolm, it automatically + # becomes the dev set. + + cat data/local/text/cleaned/bpe_val.txt > ${dir}/data/text/dev.txt + # use the training data as an additional data source. + # we can later fold the dev data into this. + cat data/train/text | cut -d " " -f 2- > ${dir}/data/text/train.txt + cat data/local/text/cleaned/bpe_corpus.txt > ${dir}/data/text/corpus_text.txt + # for reporting perplexities, we'll use the "real" dev set. + # (the validation data is used as ${dir}/data/text/dev.txt to work + # out interpolation weights.) + # note, we can't put it in ${dir}/data/text/, because then pocolm would use + # it as one of the data sources. + cut -d " " -f 2- < data/test/text > ${dir}/data/real_dev_set.txt + + # get the wordlist from train and corpus text + cat ${dir}/data/text/{train,corpus_text}.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count + cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist +fi + +if [ $stage -le 1 ]; then + # decide on the vocabulary. + # Note: you'd use --wordlist if you had a previously determined word-list + # that you wanted to use. + # Note: if you have more than one order, use a certain amount of words as the + # vocab and want to restrict max memory for 'sort', + echo "$0: training the unpruned LM" + min_counts='train=1' + wordlist=${dir}/data/wordlist + + lm_name="`basename ${wordlist}`_${order}" + if [ -n "${min_counts}" ]; then + lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`" + fi + unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm + train_lm.py --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \ + --limit-unk-history=true \ + ${bypass_metaparam_optim_opt} \ + ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir} + + get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' + mkdir -p ${dir}/data/arpa + format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz +fi + +if [ $stage -le 2 ]; then + echo "$0: pruning the LM (to larger size)" + # Using 10 million n-grams for a big LM for rescoring purposes. + size=10000000 + prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big + + get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity' + #[perplexity = 22.0613098868] over 151116.0 words + mkdir -p ${dir}/data/arpa + format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz +fi + +if [ $stage -le 3 ]; then + echo "$0: pruning the LM (to smaller size)" + # Using 2 million n-grams for a smaller LM for graph building. Prune from the + # bigger-pruned LM, it'll be faster. + size=2000000 + prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small + + get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity' + #[perplexity = 23.4801171202] over 151116.0 words + format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz +fi diff --git a/egs/yomdle_russian/v1/local/wer_output_filter b/egs/yomdle_russian/v1/local/wer_output_filter new file mode 100755 index 00000000000..59e364e0231 --- /dev/null +++ b/egs/yomdle_russian/v1/local/wer_output_filter @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +# Copyright 2017 Hossein Hadian + +# Apache 2.0 +# This script converts a BPE-encoded text to normal text. It is used in scoring + +import sys, io +import string +infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') +output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') +for line in infile: + words = line.strip().split() + uttid = words[0] + transcript = ''.join(words[1:]) + transcript = transcript.replace('|', ' ') + output.write(uttid + ' ' + transcript + '\n') diff --git a/egs/yomdle_russian/v1/local/yomdle b/egs/yomdle_russian/v1/local/yomdle new file mode 120000 index 00000000000..2c4544c1399 --- /dev/null +++ b/egs/yomdle_russian/v1/local/yomdle @@ -0,0 +1 @@ +../../../yomdle_tamil/v1/local/yomdle/ \ No newline at end of file diff --git a/egs/yomdle_russian/v1/path.sh b/egs/yomdle_russian/v1/path.sh new file mode 100755 index 00000000000..2d17b17a84a --- /dev/null +++ b/egs/yomdle_russian/v1/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/yomdle_russian/v1/run_end2end.sh b/egs/yomdle_russian/v1/run_end2end.sh new file mode 100755 index 00000000000..12beebeaa05 --- /dev/null +++ b/egs/yomdle_russian/v1/run_end2end.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +# Copyright 2018 Hossein Hadian +# Ashish Arora +# Jonathan Chang +# Apache 2.0 + +set -e +stage=0 +nj=30 + +language_main=Russian +slam_dir=/export/corpora5/slam/SLAM/ +yomdle_dir=/export/corpora5/slam/YOMDLE/ +corpus_dir=/export/corpora5/handwriting_ocr/corpus_data/ru/ +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +./local/check_tools.sh +# Start from stage=-2 for data preparation. This stage stores line images, +# csv files and splits{train,test,train_unsup} data/download/truth_line_image, +# data/download/truth_csv and data/local/splits respectively. +if [ $stage -le -2 ]; then + echo "$0: $(date): preparing data, obtaining line images and csv files..." + local/yomdle/create_download_dir.sh --language_main $language_main \ + --slam_dir $slam_dir --yomdle_dir $yomdle_dir +fi + +if [ $stage -le -1 ]; then + echo "$0: $(date): getting corpus text for language modelling..." + mkdir -p data/local/text/cleaned + cat $corpus_dir/* > data/local/text/ru.txt + head -20000 data/local/text/ru.txt > data/local/text/cleaned/val.txt + tail -n +20000 data/local/text/ru.txt > data/local/text/cleaned/corpus.txt +fi + +mkdir -p data/{train,test}/data +if [ $stage -le 0 ]; then + echo "$0: stage 0: Processing train and test data.$(date)" + echo "$0: creating text, images.scp, utt2spk and spk2utt" + #local/prepare_data.sh data/download/ + for set in train test; do + local/process_data.py data/download/ \ + data/local/splits/${set}.txt data/${set} + image/fix_data_dir.sh data/${set} + done +fi + +if [ $stage -le 1 ]; then + echo "$0: $(date) stage 1: getting allowed image widths for e2e training..." + image/get_image2num_frames.py --feat-dim 40 data/train + image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train + for set in train test; do + echo "$0: $(date) Extracting features, creating feats.scp file" + local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} + steps/compute_cmvn_stats.sh data/${set} || exit 1; + done + image/fix_data_dir.sh data/train +fi + +if [ $stage -le 3 ]; then + echo "$0: $(date) stage 3: BPE preparation" + # getting non-silence phones. + cut -d' ' -f2- data/train/text | \ +python3 <( +cat << "END" +import os, sys, io; +infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'); +output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8'); +phone_dict = dict(); +for line in infile: + line_vect = line.strip().split(); + for word in line_vect: + for phone in word: + phone_dict[phone] = phone; + +for phone in phone_dict.keys(): + output.write(phone+ '\n'); +END + ) > data/local/text/cleaned/phones.txt + + cut -d' ' -f2- data/train/text > data/local/text/cleaned/train.txt + + echo "$0: learning BPE..." + # it is currently learned with only training text but we can also use all corpus text + # to learn BPE. phones are added so that one isolated occurance of every phone exists. + cat data/local/text/cleaned/phones.txt data/local/text/cleaned/train.txt | \ + utils/lang/bpe/prepend_words.py | utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt || exit 1; +fi + +if [ $stage -le 4 ]; then + echo "$0: $(date) stage 4: applying BPE..." + echo "$0: applying BPE on train, test text..." + for set in test train; do + cut -d' ' -f1 data/$set/text > data/$set/ids + cut -d' ' -f2- data/$set/text | utils/lang/bpe/prepend_words.py | \ + utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt | \ + sed 's/@@//g' > data/$set/bpe_text + mv data/$set/text data/$set/text.old + paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text + rm -f data/$set/bpe_text data/$set/ids + done + + echo "$0: applying BPE to corpus text..." + cat data/local/text/cleaned/corpus.txt | utils/lang/bpe/prepend_words.py | \ + utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt | \ + sed 's/@@//g' > data/local/text/cleaned/bpe_corpus.txt + cat data/local/text/cleaned/val.txt | utils/lang/bpe/prepend_words.py | \ + utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt | \ + sed 's/@@//g' > data/local/text/cleaned/bpe_val.txt +fi + +if [ $stage -le 5 ]; then + echo "$0: $(date) stage 5: Preparing dictionary and lang..." + local/prepare_dict.sh --dir data/local/dict + utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 4 --sil-prob 0.0 --position-dependent-phones false \ + data/local/dict "" data/lang/temp data/lang + utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang +fi + +if [ $stage -le 6 ]; then + echo "$0: $(date) stage 6: Calling the flat-start chain recipe..." + local/chain/run_e2e_cnn.sh +fi + +if [ $stage -le 7 ]; then + echo "$0: $(date) stage 7: Aligning the training data using the e2e chain model..." + steps/nnet3/align.sh --nj $nj --cmd "$cmd" \ + --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \ + data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train +fi + +chunk_width='340,300,200,100' +lang_decode=data/lang +lang_rescore=data/lang_rescore_6g +if [ $stage -le 8 ]; then + echo "$0: $(date) stage 8: Building a tree and training a regular chain model using the e2e alignments..." + local/chain/run_cnn_e2eali.sh --chunk_width $chunk_width +fi + +if [ $stage -le 9 ]; then + echo "$0: $(date) stage 9: Estimating a language model for decoding..." + local/train_lm.sh + utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \ + data/local/dict/lexicon.txt data/lang + utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ + data/lang data/lang_rescore_6g +fi + +if [ $stage -le 10 ] && $decode_e2e; then + echo "$0: $(date) stage 10: decoding end2end setup..." + + utils/mkgraph.sh \ + --self-loop-scale 1.0 $lang_decode \ + exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1; + + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 30 --cmd "$cmd" --beam 12 \ + exp/chain/e2e_cnn_1a/graph data/test exp/chain/e2e_cnn_1a/decode_test || exit 1; + + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/test exp/chain/e2e_cnn_1a/decode_test{,_rescored} || exit 1 + + echo "$0: Done. Date: $(date). Results:" + local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ +fi + +if [ $stage -le 11 ] && $decode_chain; then + echo "$0: $(date) stage 11: decoding chain alignment setup..." + + utils/mkgraph.sh \ + --self-loop-scale 1.0 $lang_decode \ + exp/chain/cnn_e2eali_1a/ exp/chain/cnn_e2eali_1a/graph || exit 1; + + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 30 --cmd "$cmd" --beam 12 \ + exp/chain/cnn_e2eali_1a/graph data/test exp/chain/cnn_e2eali_1a/decode_test || exit 1; + + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/test exp/chain/cnn_e2eali_1a/decode_test{,_rescored} || exit 1 + + echo "$0: Done. Date: $(date). Results:" + local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a +fi diff --git a/egs/yomdle_russian/v1/steps b/egs/yomdle_russian/v1/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/yomdle_russian/v1/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/yomdle_russian/v1/utils b/egs/yomdle_russian/v1/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/yomdle_russian/v1/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file From 7e529edfee04c93d0003eb6f7f57835c1577d4ab Mon Sep 17 00:00:00 2001 From: GoVivace Date: Mon, 21 Jan 2019 13:56:21 -0500 Subject: [PATCH 003/235] [egs] Simplify lexicon preparation in Fisher callhome Spanish (#2999) --- egs/fisher_callhome_spanish/s5/local/merge_lexicons.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py b/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py index 864b76b671b..b42eb52d20a 100755 --- a/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py +++ b/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py @@ -1,11 +1,11 @@ -#!/usr/bin/env python # Copyright 2014 Gaurav Kumar. Apache 2.0 +# 2018 Saikiran Valluri, GoVivace inc., Avaaya +#!/usr/bin/env python # -*- coding: utf-8 -*- # # Merges unique words from Spanish Fisher, Gigaword and the LDC spanish lexicon - from __future__ import print_function -import sys +import sys, re import json import codecs import operator @@ -17,6 +17,7 @@ uw_gigaword = tmpdir + "/es_wordlist.json" uw_LDC = ldc_lexicon + "/callhome_spanish_lexicon_970908/preferences" +filtered_letters = re.compile(u'[¡¥ª°º¿àçèëìîôö0123456789]') merged_lexicon = [] # All three lexicons are in different formats # First add the data from lexicon_fisher (A) into the dictionary @@ -55,7 +56,8 @@ ltuples = sorted(merged_lexicon) for item in ltuples: - lf.write(item + "\n") + if not item==u'ñ' and not re.search(filtered_letters, item): + lf.write(item + "\n") lf.close() From 25f09e8abfd1135ffb56ce52b9b56468db02fb12 Mon Sep 17 00:00:00 2001 From: Ashish Arora Date: Tue, 22 Jan 2019 15:45:05 -0500 Subject: [PATCH 004/235] [egs] Update GALE Arabic recipe (#2934) --- egs/gale_arabic/s5b/RESULTS | 22 ++ egs/gale_arabic/s5b/cmd.sh | 6 +- .../s5b/local/chain/compare_wer.sh | 72 +++++ .../s5b/local/chain/run_chain_common.sh | 82 +++++ .../s5b/local/chain/tuning/run_tdnn_1a.sh | 287 +++++++++--------- .../s5b/local/gale_data_prep_audio.sh | 32 -- .../s5b/local/gale_data_prep_split.sh | 39 --- .../s5b/local/gale_data_prep_txt.sh | 60 ---- egs/gale_arabic/s5b/local/gale_format_data.sh | 60 ---- .../s5b/local/gale_prep_grapheme_dict.sh | 41 --- egs/gale_arabic/s5b/local/gale_train_lms.sh | 81 ----- .../s5b/local/nnet3/run_ivector_common.sh | 138 +++------ egs/gale_arabic/s5b/local/prepare_data.sh | 104 +++++++ egs/gale_arabic/s5b/local/prepare_dict.sh | 48 +++ egs/gale_arabic/s5b/local/prepare_lexicon.py | 26 ++ egs/gale_arabic/s5b/local/prepare_lm.sh | 51 ++++ egs/gale_arabic/s5b/local/score.sh | 60 +--- egs/gale_arabic/s5b/local/wer_output_filter | 19 ++ egs/gale_arabic/s5b/run.sh | 262 +++++++--------- 19 files changed, 725 insertions(+), 765 deletions(-) create mode 100755 egs/gale_arabic/s5b/local/chain/compare_wer.sh create mode 100755 egs/gale_arabic/s5b/local/chain/run_chain_common.sh delete mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_audio.sh delete mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_split.sh delete mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_txt.sh delete mode 100755 egs/gale_arabic/s5b/local/gale_format_data.sh delete mode 100755 egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh delete mode 100755 egs/gale_arabic/s5b/local/gale_train_lms.sh create mode 100755 egs/gale_arabic/s5b/local/prepare_data.sh create mode 100755 egs/gale_arabic/s5b/local/prepare_dict.sh create mode 100755 egs/gale_arabic/s5b/local/prepare_lexicon.py create mode 100755 egs/gale_arabic/s5b/local/prepare_lm.sh create mode 100755 egs/gale_arabic/s5b/local/wer_output_filter diff --git a/egs/gale_arabic/s5b/RESULTS b/egs/gale_arabic/s5b/RESULTS index 2260a106654..e0fb9d38ceb 100644 --- a/egs/gale_arabic/s5b/RESULTS +++ b/egs/gale_arabic/s5b/RESULTS @@ -65,8 +65,30 @@ Combined Results for Reports and Conversational WER: %WER 32.36 [ 22542 / 69668, 2156 ins, 4184 del, 16202 sub ] exp/tri2b_mmi/decode_it4/wer_11 %WER 32.50 [ 22640 / 69668, 2393 ins, 3956 del, 16291 sub ] exp/tri2b_mmi/decode_it3/wer_11 %WER 32.79 [ 22847 / 69668, 2407 ins, 4760 del, 15680 sub ] exp/tri2b_mpe/decode_it3/wer_13 +# WER with train_sat_basis +%WER 33.35 [ 23233 / 69668, 2385 ins, 5274 del, 15574 sub ] exp/tri3b/decode/wer_16_0.5 +# WER with train_sat %WER 33.61 [ 23413 / 69668, 2817 ins, 4577 del, 16019 sub ] exp/tri3b/decode/wer_17 %WER 35.73 [ 24894 / 69668, 2630 ins, 4944 del, 17320 sub ] exp/tri3b/decode.si/wer_15 %WER 36.17 [ 25196 / 69668, 2429 ins, 5393 del, 17374 sub ] exp/tri2b/decode/wer_16 %WER 39.42 [ 27462 / 69668, 2473 ins, 6051 del, 18938 sub ] exp/tri2a/decode/wer_14 %WER 40.35 [ 28113 / 69668, 2713 ins, 5635 del, 19765 sub ] exp/tri1/decode/wer_13 + + +# Effect of GMM seed model (tri2b instead of tri3b). Using tri3b give a slightly better result +# as compared to using tri2b as seed. +%WER 16.66 [ 11610 / 69668, 1233 ins, 2747 del, 7630 sub ] exp/chain/tdnn_1a_3b_sp/decode_test/wer_10_0.0 +%WER 16.71 [ 11642 / 69668, 1145 ins, 2908 del, 7589 sub ] exp/chain/tdnn_1a_2b_sp/decode_test/wer_9_0.0 + +# Effect of Tree-size (3500, 4500, 7000, 11000) +%WER 16.66 [ 11610 / 69668, 1233 ins, 2747 del, 7630 sub ] exp/chain/tdnn_1a_3500_sp/decode_test/wer_10_0.0 +%WER 16.59 [ 11557 / 69668, 1234 ins, 2646 del, 7677 sub ] exp/chain/tdnn_1a_4500_sp/decode_test/wer_10_0.0 +%WER 16.47 [ 11474 / 69668, 1421 ins, 2207 del, 7846 sub ] exp/chain/tdnn_1a_7000_sp/decode_test/wer_9_0.0 +%WER 16.62 [ 11580 / 69668, 1164 ins, 2789 del, 7627 sub ] exp/chain/tdnn_1a_11000_sp/decode_test/wer_10_0.0 + +# Effect of l2-regularization on the output with tree-size=7000. l2 on the output (0.005,0.002) +%WER 16.54 [ 11522 / 69668, 1123 ins, 2739 del, 7660 sub ] exp/chain/tdnn_1a_7000_005_sp/decode_test/wer_9_0.5 +%WER 16.47 [ 11474 / 69668, 1421 ins, 2207 del, 7846 sub ] exp/chain/tdnn_1a_7000_002_sp/decode_test/wer_9_0.0 + +#current best 'chain' models (see local/chain/tuning/run_tdnn_1a.sh) +%WER 16.47 [ 11474 / 69668, 1421 ins, 2207 del, 7846 sub ] exp/chain/tdnn_1a_sp/decode_test/wer_9_0.0 diff --git a/egs/gale_arabic/s5b/cmd.sh b/egs/gale_arabic/s5b/cmd.sh index 71dd849a93b..ea341c98d4a 100755 --- a/egs/gale_arabic/s5b/cmd.sh +++ b/egs/gale_arabic/s5b/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="queue.pl --mem 2G" -export decode_cmd="queue.pl --mem 4G" -export mkgraph_cmd="queue.pl --mem 8G" +export train_cmd="retry.pl queue.pl --mem 2G" +export decode_cmd="retry.pl queue.pl --mem 4G" +export mkgraph_cmd="retry.pl queue.pl --mem 8G" diff --git a/egs/gale_arabic/s5b/local/chain/compare_wer.sh b/egs/gale_arabic/s5b/local/chain/compare_wer.sh new file mode 100755 index 00000000000..1a40523355a --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/compare_wer.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b} + +# ./local/chain/compare_wer.sh exp/chain/cnn1a +# System cnn1a +# WER 0.61 +# CER 0.15 +# Final train prob -0.0377 +# Final valid prob -0.0380 +# Final train prob (xent) -0.0830 +# Final valid prob (xent) -0.0838 + +if [ $# == 0 ]; then + echo "Usage: $0: [ ... ]" + echo "e.g.: $0 exp/chain/cnn{1a,1b}" + exit 1 +fi + +echo "# $0 $*" +used_epochs=false + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +echo -n "# WER " +for x in $*; do + wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# CER " +for x in $*; do + cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}') + printf "% 10s" $cer +done +echo + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent) " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent) " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/gale_arabic/s5b/local/chain/run_chain_common.sh b/egs/gale_arabic/s5b/local/chain/run_chain_common.sh new file mode 100755 index 00000000000..da37e148441 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_chain_common.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# this script has common stages shared across librispeech chain recipes. +# It generates a new topology in a new lang directory, gets the alignments as +# lattices, and builds a tree for the new topology +set -e + +stage=11 + +# input directory names. These options are actually compulsory, and they have +# been named for convenience +gmm_dir= +ali_dir= +lores_train_data_dir= + +num_leaves=6000 + +# output directory names. They are also compulsory. +lang= +lat_dir= +tree_dir= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1; +[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1; +[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1; + +for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do + [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 +done + +if [ $stage -le 11 ]; then + echo "$0: creating lang directory with one state per phone." + # Create a version of the lang/ directory that has one state per phone in the + # topo file. [note, it really has two states.. the first one is only repeated + # once, the second one has zero or more repeats.] + if [ -d $lang ]; then + if [ $lang/L.fst -nt data/lang/L.fst ]; then + echo "$0: $lang already exists, not overwriting it; continuing" + else + echo "$0: $lang already exists and seems to be older than data/lang..." + echo " ... not sure what to do. Exiting." + exit 1; + fi + else + cp -r data/lang $lang + silphonelist=$(cat $lang/phones/silence.csl) || exit 1; + nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; + # Use our special topology... note that later on may have to tune this + # topology. + steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo + fi +fi + +if [ $stage -le 12 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + nj=$(cat ${ali_dir}/num_jobs) || exit 1; + steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \ + $lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 13 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir +fi + +exit 0; diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh index 7afafb31ff6..a3ccfda04ac 100755 --- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -1,31 +1,51 @@ #!/bin/bash -#started from tedlium recipe with few edits +# ./local/chain/compare_wer.sh exp/chain/tdnn_1a_sp +# System tdnn_1a_sp +# WER 16.47 +# CER 6.68 +# Final train prob -0.0652 +# Final valid prob -0.0831 +# Final train prob (xent) -0.8965 +# Final valid prob (xent) -0.9964 +# steps/info/chain_dir_info.pl exp/chain/tdnn_1a_sp/ +# exp/chain/tdnn_1a_sp/: num-iters=441 nj=3..16 num-params=18.6M dim=40+100->5816 combine=-0.063->-0.062 (over 6) xent:train/valid[293,440,final]=(-1.22,-0.912,-0.896/-1.29,-1.01,-0.996) logprob:train/valid[293,440,final]=(-0.097,-0.066,-0.065/-0.108,-0.084,-0.083) -set -e -o pipefail -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). +set -e -o pipefail stage=0 nj=30 -decode_nj=30 -min_seg_len=1.55 -xent_regularize=0.1 train_set=train -gmm=tri2b # the gmm for the target data +test_set=test +gmm=tri3b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. num_threads_ubm=32 -nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned - -# The rest are configs specific to this script. Most of the parameters -# are just hardcoded at this level, in the commands below. -train_stage=-10 #default -10 -tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tdnn_affix=1b #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. -common_egs_dir= # you can set this to use previously dumped egs. +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM/chain options +train_stage=-10 +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.5@0.50,0' + +# training chunk-options +chunk_width=150,110,100 +get_egs_stage=-10 + +# training options +srand=0 +remove_egs=true +run_ivector_common=true +run_chain_common=true # End configuration section. echo "$0 $@" # Print the command line for logging + . ./cmd.sh . ./path.sh . ./utils/parse_options.sh @@ -39,169 +59,162 @@ where "nvcc" is installed. EOF fi -local/nnet3/run_ivector_common.sh --stage $stage \ - --nj $nj \ - --min-seg-len $min_seg_len \ - --train-set $train_set \ - --gmm $gmm \ - --num-threads-ubm $num_threads_ubm \ - --nnet3-affix "$nnet3_affix" - - -gmm_dir=exp/$gmm -ali_dir=exp/${gmm}_ali_${train_set}_sp_comb -tree_dir=exp/chain${nnet3_affix}/tree_bi${tree_affix} -lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats -dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp_bi -train_data_dir=data/${train_set}_sp_hires_comb -lores_train_data_dir=data/${train_set}_sp_comb -train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb - +if $run_ivector_common; then + local/nnet3/run_ivector_common.sh \ + --stage $stage --nj $nj \ + --train-set $train_set --gmm $gmm \ + --num-threads-ubm $num_threads_ubm \ + --nnet3-affix "$nnet3_affix" +fi -for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ - $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz $gmm_dir/final.mdl; do +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp +lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats +dir=exp/chain${nnet3_affix}/tdnn${affix}_sp +train_data_dir=data/${train_set}_sp_hires +train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires +lores_train_data_dir=data/${train_set}_sp + +# note: you don't necessarily have to change the treedir name +# each time you do a new experiment-- only if you change the +# configuration in a way that affects the tree. +tree_dir=exp/chain${nnet3_affix}/tree_a_sp +# the 'lang' directory is created by this script. +# If you create such a directory with a non-standard topology +# you should probably name it differently. +lang=data/lang_chain + +for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ + $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \ + $ali_dir/ali.1.gz $gmm_dir/final.mdl; do [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 done -if [ $stage -le 14 ]; then - echo "$0: creating lang directory with one state per phone." - # Create a version of the lang/ directory that has one state per phone in the - # topo file. [note, it really has two states.. the first one is only repeated - # once, the second one has zero or more repeats.] - if [ -d data/lang_chain ]; then - if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then - echo "$0: data/lang_chain already exists, not overwriting it; continuing" - else - echo "$0: data/lang_chain already exists and seems to be older than data/lang..." - echo " ... not sure what to do. Exiting." - exit 1; - fi - else - cp -r data/lang data/lang_chain - silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1; - nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1; - # Use our special topology... note that later on may have to tune this - # topology. - steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo - fi +# Please take this as a reference on how to specify all the options of +# local/chain/run_chain_common.sh +if $run_chain_common; then + local/chain/run_chain_common.sh --stage $stage \ + --gmm-dir $gmm_dir \ + --ali-dir $ali_dir \ + --lores-train-data-dir ${lores_train_data_dir} \ + --lang $lang \ + --lat-dir $lat_dir \ + --num-leaves 7000 \ + --tree-dir $tree_dir || exit 1; fi if [ $stage -le 15 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ - data/lang $gmm_dir $lat_dir - rm $lat_dir/fsts.*.gz # save space -fi - -if [ $stage -le 16 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir -fi - -if [ $stage -le 17 ]; then mkdir -p $dir - echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.002" mkdir -p $dir/configs + cat < $dir/configs/network.xconfig input dim=100 name=ivector input dim=40 name=input - # please note that it is important to have input layer with the name=input # as the layer immediately preceding the fixed-affine-layer to enable # the use of short notation for the descriptor fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - # the first splicing is moved before the lda layer, so no splicing here - relu-renorm-layer name=tdnn1 dim=450 - relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 - relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 - relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 - relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 - relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 - - ## adding the layers for chain branch - relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - + relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + linear-component name=prefinal-l dim=256 $linear_opts + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ - fi -if [ $stage -le 18 ]; then + +if [ $stage -le 16 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/gale_arabic-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ --chain.xent-regularize $xent_regularize \ --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ + --chain.l2-regularize 0.0 \ --chain.apply-deriv-weights false \ --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.dir "$common_egs_dir" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --trainer.num-chunk-per-minibatch 128 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs 6 \ --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 4 \ - --trainer.optimization.num-jobs-initial 2 \ - --trainer.optimization.num-jobs-final 2 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.max-param-change 2.0 \ - --cleanup.remove-egs true \ - --feat-dir $train_data_dir \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.00025 \ + --trainer.optimization.final-effective-lrate 0.000025 \ + --trainer.num-chunk-per-minibatch=64,32 \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0 --constrained false" \ + --egs.stage $get_egs_stage \ + --reporting.email="$reporting_email" \ + --cleanup.remove-egs=$remove_egs \ + --feat-dir=$train_data_dir \ --tree-dir $tree_dir \ - --lat-dir $lat_dir \ - --dir $dir -fi - + --lat-dir=$lat_dir \ + --dir $dir || exit 1; +fi -if [ $stage -le 19 ]; then - # Note: it might appear that this data/lang_chain directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +if [ $stage -le 17 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_test/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test \ + $tree_dir $tree_dir/graph || exit 1; fi -if [ $stage -le 20 ]; then +if [ $stage -le 18 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) rm $dir/.error 2>/dev/null || true - steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ - --acwt 1.0 --post-decode-acwt 10.0 \ - --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ - --scoring-opts "--min-lmwt 5 " \ - $dir/graph data/test_hires $dir/decode || exit 1; + + steps/nnet3/decode.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context 0 --extra-right-context 0 \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk $frames_per_chunk \ + --nj $nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test_set}_hires \ + $tree_dir/graph data/${test_set}_hires ${dir}/decode_${test_set} || exit 1 fi -exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh deleted file mode 100755 index 0125272d06c..00000000000 --- a/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Copyright 2014 QCRI (author: Ahmed Ali) -# Apache 2.0 - - -galeData=$(utils/make_absolute.sh "${@: -1}" ); # last argumnet; the local folder -audio_dvds=${@:1:${#}-1} # all the audio dvds for GALE corpus; ; check audio=( in ../run.sh - -mkdir -p $galeData - -# check that sox is installed -which sox &>/dev/null -if [[ $? != 0 ]]; then - echo "sox is not installed"; exit 1 -fi - -for dvd in $audio_dvds; do - dvd_full_path=$(utils/make_absolute.sh $dvd) - if [[ ! -e $dvd_full_path ]]; then - echo missing $dvd_full_path; exit 1; - fi - find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \) | while read file; do - id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}') - echo "$id sox $file -r 16000 -t wav - |" - done -done | sort -u > $galeData/wav.scp - -echo data prep audio succeded - -exit 0 - diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_split.sh b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh deleted file mode 100755 index b18a4e5b105..00000000000 --- a/egs/gale_arabic/s5b/local/gale_data_prep_split.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -# Copyright 2014 QCRI (author: Ahmed Ali) -# Apache 2.0 - -if [ $# -ne 1 ]; then - echo "Arguments should be the "; exit 1 -fi - - -#data will data/local - -galeData=$(utils/make_absolute.sh $1) -mkdir -p data/local -dir=$(utils/make_absolute.sh data/local) - - -grep -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.test -grep -v -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.train - -for x in test train; do - outdir=$dir/$x - file=$galeData/all.$x - mkdir -p $outdir - awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk - cp -pr $outdir/utt2spk $outdir/spk2utt - awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments - awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text -done - - -grep -f local/test_list $galeData/wav.scp > $dir/test/wav.scp - -cat $galeData/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}} - {if (seen[$1]) { print $0}}' > $dir/train/wav.scp - -echo data prep split succeeded - -exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh deleted file mode 100755 index 04529d88ac0..00000000000 --- a/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# Copyright 2014 QCRI (author: Ahmed Ali) -# Apache 2.0 - -galeData=$(utils/make_absolute.sh "${@: -1}" ); # last argumnet; the local folder -txt_dvds=${@:1:${#}-1} # all the txt cds correspoding to the audio corpus; check text=( in ../run.sh - - -top_pwd=`pwd` -txtdir=$galeData/txt -mkdir -p $txtdir; cd $txtdir - -for cdx in $txt_dvds; do - echo "Preparing $cdx" - if [[ $cdx == *.tgz ]] ; then - tar -xvf $cdx - elif [ -d "$cdx" ]; then - ln -s $cdx `basename $cdx` - else - echo "I don't really know what I shall do with $cdx " >&2 - fi -done - -find -L . -type f -name "*.tdf" | while read file; do -sed '1,3d' $file # delete the first 3 lines -done > all.tmp$$ - -perl -e ' - ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0]; - open(IN, "$inFile"); - open(ID, ">$idFile"); - open(TXT, ">$txtFile"); - while () { - @arr= split /\t/,$_; - $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning - $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//; - if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";} - $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n"; - next if ($rStart == $rEnd); - $id =~ s/.sph//g; - print ID $id; - print TXT "$arr[7]\n"; - }' "all.tmp$$ allid.tmp$$ contentall.tmp$$" - - -perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$ - -paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}' > all_1.tmp$$ - -awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $galeData/all -awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/report -awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/conversational - -cd ..; -rm -fr $txtdir -cd $top_pwd -echo data prep text succeeded - -exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_format_data.sh b/egs/gale_arabic/s5b/local/gale_format_data.sh deleted file mode 100755 index b69c34e68b9..00000000000 --- a/egs/gale_arabic/s5b/local/gale_format_data.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# Copyright 2014 QCRI (author: Ahmed Ali) -# Apache 2.0 - -if [ -f path.sh ]; then - . ./path.sh; else - echo "$0: missing path.sh"; exit 1; -fi - -for dir in test train; do - cp -pr data/local/$dir data/$dir -done - - -mkdir -p data/lang_test - -arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz -[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; - -rm -r data/lang_test -cp -r data/lang data/lang_test - -gunzip -c "$arpa_lm" | \ - arpa2fst --disambig-symbol=#0 \ - --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst - - -echo "$0: Checking how stochastic G is (the first of these numbers should be small):" -fstisstochastic data/lang_test/G.fst - -## Check lexicon. -## just have a look and make sure it seems sane. -echo "$0: First few lines of lexicon FST:" -fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head - -echo "$0: Performing further checks" - -# Checking that G.fst is determinizable. -fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. - -# Checking that L_disambig.fst is determinizable. -fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. - -# Checking that disambiguated lexicon times G is determinizable -# Note: we do this with fstdeterminizestar not fstdeterminize, as -# fstdeterminize was taking forever (presumbaly relates to a bug -# in this version of OpenFst that makes determinization slow for -# some case). -fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ - fstdeterminizestar >/dev/null || echo Error - -# Checking that LG is stochastic: -fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ - fstisstochastic || echo LG is not stochastic - - -echo gale_format_data succeeded. - -exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh deleted file mode 100755 index 5f101f8245b..00000000000 --- a/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Copyright 2017 QCRI (author: Ahmed Ali) -# Apache 2.0 - - -# run this from ../ -dir=$(utils/make_absolute.sh data/local/dict) -mkdir -p $dir - - -# (1) Get all avaialble dictionaries, since this is a grapheme model, so we mainly need the most frequent word lists -wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2 || exit 1; -wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2 || exit 1; -bzcat ar-ar_grapheme_lexicon_2016-02-09.bz2 | sed '1,3d' | awk '{print $1}' > tmp$$ -bzcat ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >> tmp$$ -# (2) Now we add all the words appeared in the training data -cat data/local/train/text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> tmp$$ -grep -v [0-9] tmp$$ | sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > tmp1.$$ # remove vowels and rare alef wasla -cat tmp1.$$ | sed 's:\(\):\1 :g' | sed -e 's: : :g' -e 's: : :g' -e 's:\s*: :g' -e 's:\*:V:g' > tmp2.$$ -paste -d ' ' tmp1.$$ tmp2.$$ > $dir/lexicon.txt - -#(2) Dictionary preparation: - -# silence phones, one per line. -echo SIL > $dir/silence_phones.txt -echo SIL > $dir/optional_silence.txt - -# nonsilence phones; on each line is a list of phones that correspond -# really to the same base phone. -cat tmp2.$$ | tr -s ' ' '\n' | grep -v ^$ | sort -u > $dir/nonsilence_phones.txt || exit 1; - -sed -i '1i SIL' $dir/lexicon.txt # insert word with phone sil at the begining of the dictionary - -rm -fr ar-ar_lexicon_2014-03-17.txt.bz2 ar-ar_grapheme_lexicon_2016-02-09.bz2 tmp$$ tmp1.$$ tmp2.$$ -echo Dictionary preparation succeeded - -# The script is still missing dates and numbers - -exit 0 - diff --git a/egs/gale_arabic/s5b/local/gale_train_lms.sh b/egs/gale_arabic/s5b/local/gale_train_lms.sh deleted file mode 100755 index 3988ec3818f..00000000000 --- a/egs/gale_arabic/s5b/local/gale_train_lms.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash - - -# To be run from one directory above this script. - - -lexicon=data/local/dict/lexicon.txt -[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1; - - -# This script takes no arguments. It assumes you have already run -# previus steps successfully -# It takes as input the files -#data/local/train.*/text -#data/local/dict/lexicon.txt - - -export LC_ALL=C # You'll get errors about things being not sorted, if you -# have a different locale. -export PATH=$PATH:./../../../tools/kaldi_lm -( # First make sure the kaldi_lm toolkit is installed. - cd $KALDI_ROOT/tools || exit 1; - if [ -d kaldi_lm ]; then - echo Not installing the kaldi_lm toolkit since it is already there. - else - echo Downloading and installing the kaldi_lm tools - if [ ! -f kaldi_lm.tar.gz ]; then - wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; - fi - tar -xvzf kaldi_lm.tar.gz || exit 1; - cd kaldi_lm - make || exit 1; - echo Done making the kaldi_lm tools - fi -) || exit 1; - - -dir=data/local/lm - mkdir -p $dir - text=data/local/train/text - [ ! -f $text ] && echo "$0: No such file $text" && exit 1; - - cleantext=$dir/text.no_oov - - cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } - {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ",$n);} } printf("\n");}' \ - > $cleantext || exit 1; - - - cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ - sort -nr > $dir/word.counts || exit 1; - - -# Get counts from acoustic training transcripts, and add one-count -# for each word in the lexicon (but not silence, we don't want it -# in the LM-- we'll add it optionally later). - cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ - cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ - sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; - -# note: we probably won't really make use of as there aren't any OOVs - cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ - || exit 1; - -# note: ignore 1st field of train.txt, it's the utterance-id. - cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} - { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ - || exit 1; - - train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; - -# LM is small enough that we don't need to prune it (only about 0.7M N-grams). -# Perplexity over 128254.000000 words is 90.446690 - -# note: output is -# data/local/lm/3gram-mincount/lm_unpruned.gz - - -echo train lm succeeded - -exit 0 diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh index f14c8441869..f071842dc0b 100755 --- a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh +++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh @@ -2,31 +2,29 @@ set -e -o pipefail -# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually -# be called by more scripts). It contains the common feature preparation and iVector-related parts -# of the script. See those scripts for examples of usage. +# This script is called from scripts like local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more scripts). It +# contains the common feature preparation and iVector-related parts of the +# script. See those scripts for examples of usage. stage=0 nj=100 -min_seg_len=1.55 # min length in seconds... we do this because chain training - # will discard segments shorter than 1.5 seconds. Must remain in sync - # with the same option given to prepare_lores_feats_and_alignments.sh train_set=train # you might set this to e.g. train. -gmm=tri2b # This specifies a GMM-dir from the features of the type you're training the system on; +test_sets="test" +gmm=tri3b # This specifies a GMM-dir from the features of the type you're training the system on; # it should contain alignments for 'train_set'. num_threads_ubm=32 -nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it - # becomes exp/nnet3_cleaned or whatever. +nnet3_affix= # affix for exp/nnet3 directory to put iVector stuff . ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh gmm_dir=exp/${gmm} -ali_dir=exp/${gmm}_ali_${train_set}_sp_comb +ali_dir=exp/${gmm}_ali_${train_set}_sp for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do if [ ! -f $f ]; then @@ -61,7 +59,7 @@ if [ $stage -le 2 ]; then utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/gale_arabic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi - for datadir in ${train_set}_sp test; do + for datadir in ${train_set}_sp ${test_sets}; do utils/copy_data_dir.sh data/$datadir data/${datadir}_hires done @@ -69,7 +67,7 @@ if [ $stage -le 2 ]; then # features; this helps make trained nnets more invariant to test data volume. utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires - for datadir in ${train_set}_sp test; do + for datadir in ${train_set}_sp ${test_sets}; do steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${datadir}_hires steps/compute_cmvn_stats.sh data/${datadir}_hires @@ -78,75 +76,33 @@ if [ $stage -le 2 ]; then fi if [ $stage -le 3 ]; then - echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" - # we have to combine short segments or we won't be able to train chain models - # on those segments. - utils/data/combine_short_segments.sh \ - data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb - - # just copy over the CMVN to avoid having to recompute it. - cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ - utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ -fi - -if [ $stage -le 4 ]; then - echo "$0: selecting segments of hires training data that were also present in the" - echo " ... original training data." - - # note, these data-dirs are temporary; we put them in a sub-directory - # of the place where we'll make the alignments. - temp_data_root=exp/nnet3${nnet3_affix}/tri5 - mkdir -p $temp_data_root - - utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ - data/${train_set}_sp_hires $temp_data_root/${train_set}_hires - - # note: essentially all the original segments should be in the hires data. - n1=$(wc -l /dev/null +if [[ $? != 0 ]]; then + echo "$0: sox is not installed"; exit 1 +fi + +for dvd in $dir1 $dir2 $dir3; do + dvd_full_path=$(utils/make_absolute.sh $dvd) + if [[ ! -e $dvd_full_path ]]; then + echo "$0: missing $dvd_full_path"; exit 1; + fi + find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \) | while read file; do + id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}') + echo "$id sox $file -r 16000 -t wav - |" + done +done | sort -u > $gale_data/wav.scp +echo "$0:data prep audio succeded" + +gale_data=$(utils/make_absolute.sh "GALE" ); +top_pwd=`pwd` +txtdir=$gale_data/txt +mkdir -p $txtdir; cd $txtdir + +for cdx in $text1 $text2 $text3; do + echo "$0:Preparing $cdx" + if [[ $cdx == *.tgz ]] ; then + tar -xvf $cdx + elif [ -d "$cdx" ]; then + ln -s $cdx `basename $cdx` + else + echo "$0:I don't really know what I shall do with $cdx " >&2 + fi +done + +find -L . -type f -name "*.tdf" | while read file; do +sed '1,3d' $file # delete the first 3 lines +done > all.tmp$$ + +perl -e ' + ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0]; + open(IN, "$inFile"); + open(ID, ">$idFile"); + open(TXT, ">$txtFile"); + while () { + @arr= split /\t/,$_; + $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning + $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//; + if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";} + $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n"; + next if ($rStart == $rEnd); + $id =~ s/.sph//g; + print ID $id; + print TXT "$arr[7]\n"; + }' "all.tmp$$ allid.tmp$$ contentall.tmp$$" + +perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$ +paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}' > all_1.tmp$$ + + +awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/all +awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/report +awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/conversational + +cd ..; +rm -fr $txtdir +cd $top_pwd +echo "$0:dat a prep text succeeded" + +mkdir -p data +dir=$(utils/make_absolute.sh data/) +grep -f local/test_list $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.test +grep -v -f local/test_list $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.train + +for x in test train; do + outdir=data/$x + file=$gale_data/all.$x + mkdir -p $outdir + awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk + cp -pr $outdir/utt2spk $outdir/spk2utt + awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments + awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text +done + +grep -f local/test_list $gale_data/wav.scp > $dir/test/wav.scp + +cat $gale_data/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}} + {if (seen[$1]) { print $0}}' > $dir/train/wav.scp + +echo "$0:data prep split succeeded" +exit 0 diff --git a/egs/gale_arabic/s5b/local/prepare_dict.sh b/egs/gale_arabic/s5b/local/prepare_dict.sh new file mode 100755 index 00000000000..47b5869fdf1 --- /dev/null +++ b/egs/gale_arabic/s5b/local/prepare_dict.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Copyright 2017 QCRI (author: Ahmed Ali) +# Apache 2.0 +# This script prepares the dictionary. + +set -e +dir=data/local/dict +lexicon_url1="http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2"; +lexicon_url2="http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2"; +stage=0 +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh || exit 1; +mkdir -p $dir data/local/lexicon_data + +if [ $stage -le 0 ]; then + echo "$0: Downloading text for lexicon... $(date)." + wget -P data/local/lexicon_data $lexicon_url1 + wget -P data/local/lexicon_data $lexicon_url2 + bzcat data/local/lexicon_data/ar-ar_grapheme_lexicon_2016-02-09.bz2 | sed '1,3d' | awk '{print $1}' > data/local/lexicon_data/grapheme_lexicon + bzcat data/local/lexicon_data/ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >> data/local/lexicon_data/grapheme_lexicon + cat data/train/text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> data/local/lexicon_data/grapheme_lexicon +fi + + +if [ $stage -le 0 ]; then + echo "$0: processing lexicon text and creating lexicon... $(date)." + # remove vowels and rare alef wasla + grep -v [0-9] data/local/lexicon_data/grapheme_lexicon | sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > data/local/lexicon_data/processed_lexicon + local/prepare_lexicon.py +fi + +cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1; + +sed -i '1i UNK' $dir/lexicon.txt + +echo UNK >> $dir/nonsilence_phones.txt + +echo ' SIL' >> $dir/lexicon.txt + +echo SIL > $dir/silence_phones.txt + +echo SIL >$dir/optional_silence.txt + +echo -n "" >$dir/extra_questions.txt + +echo "$0: Dictionary preparation succeeded" diff --git a/egs/gale_arabic/s5b/local/prepare_lexicon.py b/egs/gale_arabic/s5b/local/prepare_lexicon.py new file mode 100755 index 00000000000..215541585eb --- /dev/null +++ b/egs/gale_arabic/s5b/local/prepare_lexicon.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +# Copyright 2018 Ashish Arora +# Apache 2.0 + +# This script prepares lexicon. + +import argparse +import os + +parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""") +args = parser.parse_args() + +### main ### +lex = {} +text_path = os.path.join('data','local', 'lexicon_data', 'processed_lexicon') +with open(text_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + characters = list(line) + characters = " ".join(['V' if char == '*' else char for char in characters]) + lex[line] = characters + +with open(os.path.join('data','local','dict', 'lexicon.txt'), 'w', encoding='utf-8') as fp: + for key in sorted(lex): + fp.write(key + " " + lex[key] + "\n") diff --git a/egs/gale_arabic/s5b/local/prepare_lm.sh b/egs/gale_arabic/s5b/local/prepare_lm.sh new file mode 100755 index 00000000000..6fdf35f471a --- /dev/null +++ b/egs/gale_arabic/s5b/local/prepare_lm.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright 2012 Vassil Panayotov +# 2017 Ewald Enzinger +# Apache 2.0 + +. ./path.sh || exit 1 + +echo "=== Building a language model ..." + +dir=data/local/lm/ +text=data/train/text +lexicon=data/local/dict/lexicon.txt +# Language model order +order=3 + +. utils/parse_options.sh + +# Prepare a LM training corpus from the transcripts +mkdir -p $dir + +for f in "$text" "$lexicon"; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +loc=`which ngram-count`; +if [ -z $loc ]; then + if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... + sdir=$KALDI_ROOT/tools/srilm/bin/i686-m64 + else + sdir=$KALDI_ROOT/tools/srilm/bin/i686 + fi + if [ -f $sdir/ngram-count ]; then + echo Using SRILM tools from $sdir + export PATH=$PATH:$sdir + else + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 + fi +fi + +cat data/train/text | cut -d " " -f 2- > $dir/text.txt +cut -d' ' -f1 $lexicon > $dir/wordlist + +ngram-count -text $dir/text.txt -order $order -limit-vocab -vocab $dir/wordlist \ + -unk -map-unk "" -kndiscount -interpolate -lm $dir/lm.gz + +#ngram -lm $dir/lm.gz -ppl $dir/dev.txt +echo "*** Finished building the LM model!" diff --git a/egs/gale_arabic/s5b/local/score.sh b/egs/gale_arabic/s5b/local/score.sh index 83366f7c7fc..1d84815fc69 100755 --- a/egs/gale_arabic/s5b/local/score.sh +++ b/egs/gale_arabic/s5b/local/score.sh @@ -1,60 +1,6 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) -# Apache 2.0 - -[ -f ./path.sh ] && . ./path.sh - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -word_ins_penalty=0.0 -min_lmwt=7 -max_lmwt=17 -iter= #some of the scripts from steps/ seem to use it -#end configuration section. - -echo "$0 $#" - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi -data=$1 -lang_or_graph=$2 -dir=$3 - -symtab=$lang_or_graph/words.txt - -for f in $symtab $dir/lat.1.gz $data/text; do - [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; -done - -mkdir -p $dir/scoring/log - -cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-best-path --word-symbol-table=$symtab \ - ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; +#!/bin/bash -# Note: the double level of quoting for the sed command -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cat $dir/scoring/LMWT.tra \| \ - utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ - compute-wer --text --mode=present \ - ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; -exit 0; +steps/scoring/score_kaldi_wer.sh "$@" +steps/scoring/score_kaldi_cer.sh --stage 2 "$@" diff --git a/egs/gale_arabic/s5b/local/wer_output_filter b/egs/gale_arabic/s5b/local/wer_output_filter new file mode 100755 index 00000000000..cf48b434144 --- /dev/null +++ b/egs/gale_arabic/s5b/local/wer_output_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +# Copyright 2017 Hossein Hadian + +# Apache 2.0 +# This script converts a BPE-encoded text to normal text. It is used in scoring + +import sys, io +import string + +infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') +output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + +for line in infile: + words = line.strip().split() + words = [word for word in words if '' not in word] + uttid = words[0] + transcript = ' '.join(words[1:]) + output.write(uttid + ' ' + transcript + '\n') diff --git a/egs/gale_arabic/s5b/run.sh b/egs/gale_arabic/s5b/run.sh index c45f5119949..3f12d22495e 100755 --- a/egs/gale_arabic/s5b/run.sh +++ b/egs/gale_arabic/s5b/run.sh @@ -3,177 +3,121 @@ # Copyright 2014 QCRI (author: Ahmed Ali) # Apache 2.0 -. ./path.sh -. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. - ## This relates to the queue. num_jobs=120 num_decode_jobs=40 +decode_gmm=true +stage=0 +overwrite=false -#NB: You can add whatever number of copora you like. The supported extensions -#NB: (formats) are wav and flac. Flac will be converted using sox and in contrast -#NB: with the old approach, the conversion will be on-the-fly and one-time-only -#NB: during the parametrization. - -#NB: Text corpora scpecification. We support either tgz files, which are unpacked -#NB: or just plain (already unpacked) directories. The list of transcript is then -#NB: obtained using find command - -#Make sure you edit this section to reflect whers you keep the LDC data on your cluster - -#This is CLSP configuration. We add the 2014 GALE data. We got around 2 % -#improvement just by including it. The gain might be large if someone would tweak -# the number of leaves and states and so on. - -#audio=( -# /export/corpora/LDC/LDC2013S02/ -# /export/corpora/LDC/LDC2013S07/ -# /export/corpora/LDC/LDC2014S07/ -#) -#text=( -# /export/corpora/LDC/LDC2013T17 -# /export/corpora/LDC/LDC2013T04 -# /export/corpora/LDC/LDC2014T17 -#) - -audio=( - /data/sls/scratch/amali/data/GALE/LDC2013S02 - /data/sls/scratch/amali/data/GALE/LDC2013S07 - /data/sls/scratch/amali/data/GALE/LDC2014S07 -) -text=( - /data/sls/scratch/amali/data/GALE/LDC2013T17.tgz - /data/sls/scratch/amali/data/GALE/LDC2013T04.tgz - /data/sls/scratch/amali/data/GALE/LDC2014T17.tgz -) +dir1=/export/corpora/LDC/LDC2013S02/ +dir2=/export/corpora/LDC/LDC2013S07/ +dir3=/export/corpora/LDC/LDC2014S07/ +text1=/export/corpora/LDC/LDC2013T17/ +text2=/export/corpora/LDC/LDC2013T04/ +text3=/export/corpora/LDC/LDC2014T17/ galeData=GALE -#prepare the data -#split train dev test -#prepare lexicon and LM - -# You can run the script from here automatically, but it is recommended to run the data preparation, -# and features extraction manually and and only once. -# By copying and pasting into your shell. - -#copy the audio files to local folder wav and convet flac files to wav -local/gale_data_prep_audio.sh "${audio[@]}" $galeData || exit 1; +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. +. ./path.sh +. ./utils/parse_options.sh # e.g. this parses the above options + # if supplied. -#get the transcription and remove empty prompts and all noise markers -local/gale_data_prep_txt.sh "${text[@]}" $galeData || exit 1; +if [ $stage -le 0 ]; then -# split the data to reports and conversational and for each class will have rain/dev and test -local/gale_data_prep_split.sh $galeData || exit 1; + if [ -f data/train/text ] && ! $overwrite; then + echo "$0: Not processing, probably script have run from wrong stage" + echo "Exiting with status 1 to avoid data corruption" + exit 1; + fi -# get all Arabic grapheme dictionaries and add silence and UNK -local/gale_prep_grapheme_dict.sh || exit 1; + echo "$0: Preparing data..." + local/prepare_data.sh --dir1 $dir1 --dir2 $dir2 --dir3 $dir3 \ + --text1 $text1 --text2 $text2 --text3 $text3 + echo "$0: Preparing lexicon and LM..." + local/prepare_dict.sh -#prepare the langauge resources -utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang -# LM training -local/gale_train_lms.sh || exit 1; + local/prepare_lm.sh -local/gale_format_data.sh || exit 1; -# G compilation, check LG composition + utils/format_lm.sh data/lang data/local/lm/lm.gz \ + data/local/dict/lexicon.txt data/lang_test +fi -# Now make MFCC features. -# mfccdir should be some place with a largish disk where you -# want to store MFCC features. mfccdir=mfcc - -for x in train test ; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \ - data/$x exp/make_mfcc/$x $mfccdir - utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons - steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir -done - - -# Here we start the AM - -# Let's create a subset with 10k segments to make quick flat-start training: -utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1; - -# Train monophone models on a subset of the data, 10K segment -# Note: the --boost-silence option should probably be omitted by default -steps/train_mono.sh --nj 40 --cmd "$train_cmd" \ - data/train.10K data/lang exp/mono || exit 1; - - -# Get alignments from monophone system. -steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ - data/train data/lang exp/mono exp/mono_ali || exit 1; - -# train tri1 [first triphone pass] -steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; - -# First triphone decoding -utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph -steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ - exp/tri1/graph data/test exp/tri1/decode - -steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ - data/train data/lang exp/tri1 exp/tri1_ali || exit 1; - -# Train tri2a, which is deltas+delta+deltas -steps/train_deltas.sh --cmd "$train_cmd" \ - 3000 40000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; - -# tri2a decoding -utils/mkgraph.sh data/lang_test exp/tri2a exp/tri2a/graph -steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ - exp/tri2a/graph data/test exp/tri2a/decode - -# train and decode tri2b [LDA+MLLT] -steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \ - data/train data/lang exp/tri1_ali exp/tri2b || exit 1; - -utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph -steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ - exp/tri2b/graph data/test exp/tri2b/decode - -# Align all data with LDA+MLLT system (tri2b) -steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ - --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; - - -# From 2b system, train 3b which is LDA + MLLT + SAT. -steps/train_sat.sh --cmd "$train_cmd" \ - 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; - -utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph -steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \ - "$decode_cmd" exp/tri3b/graph data/test exp/tri3b/decode - -# From 3b system, align all data. -steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \ - data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; - - -# nnet3 cross-entropy -local/nnet3/run_tdnn.sh #tdnn recipe: -local/nnet3/run_lstm.sh --stage 12 #lstm recipe (we skip ivector training) - -# chain lattice-free -local/chain/run_tdnn.sh #tdnn recipe: -local/chain/run_tdnn_lstm.sh #tdnn-lstm recipe: - -time=$(date +"%Y-%m-%d-%H-%M-%S") - -#get detailed WER; reports, conversational and combined -local/split_wer.sh $galeData > RESULTS.details.$USER.$time # to make sure you keep the results timed and owned - -echo training succedded +if [ $stage -le 1 ]; then + echo "$0: Preparing the test and train feature files..." + for x in train test ; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \ + data/$x exp/make_mfcc/$x $mfccdir + utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir + done +fi + +if [ $stage -le 2 ]; then + echo "$0: creating sub-set and training monophone system" + utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1; + + steps/train_mono.sh --nj 40 --cmd "$train_cmd" \ + data/train.10K data/lang exp/mono || exit 1; +fi + +if [ $stage -le 3 ]; then + echo "$0: Aligning data using monophone system" + steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/mono exp/mono_ali || exit 1; + + echo "$0: training triphone system with delta features" + steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; +fi + +if [ $stage -le 4 ] && $decode_gmm; then + utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph + steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri1/graph data/test exp/tri1/decode +fi + +if [ $stage -le 5 ]; then + echo "$0: Aligning data and retraining and realigning with lda_mllt" + steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + + steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \ + data/train data/lang exp/tri1_ali exp/tri2b || exit 1; +fi + +if [ $stage -le 6 ] && $decode_gmm; then + utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph + steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2b/graph data/test exp/tri2b/decode +fi + +if [ $stage -le 7 ]; then + echo "$0: Aligning data and retraining and realigning with sat_basis" + steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; + + steps/train_sat_basis.sh --cmd "$train_cmd" \ + 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; + + steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; +fi + +if [ $stage -le 8 ] && $decode_gmm; then + utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph + steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \ + "$decode_cmd" exp/tri3b/graph data/test exp/tri3b/decode +fi + +if [ $stage -le 9 ]; then + echo "$0: Training a regular chain model using the e2e alignments..." + local/chain/run_tdnn.sh +fi + +echo "$0: training succedded" exit 0 - -#TODO: -#LM (4-gram and RNN) rescoring -#combine lattices -#dialect detection - - - - - From 4338004dd474bdc0040ef351b44a2a6ff42c1fb0 Mon Sep 17 00:00:00 2001 From: Ashish Arora Date: Tue, 22 Jan 2019 16:10:24 -0500 Subject: [PATCH 005/235] [egs] Remove outdated NN results from Gale Arabic recipe (#3002) --- egs/gale_arabic/s5b/RESULTS | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/egs/gale_arabic/s5b/RESULTS b/egs/gale_arabic/s5b/RESULTS index e0fb9d38ceb..a485240ff6b 100644 --- a/egs/gale_arabic/s5b/RESULTS +++ b/egs/gale_arabic/s5b/RESULTS @@ -2,13 +2,7 @@ # This file is generated using local/split_wer.sh $galeData //galeData is a local folder to keep intermediate gale data # look at the end of run.sh in the same folder ## -##### RESULTS generated by amali at 2017-01-01-08-05-59 - Report Results WER: -%WER 9.50 [ 2124 / 22363, 160 ins, 275 del, 1689 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_report_9 -%WER 10.72 [ 2398 / 22363, 163 ins, 313 del, 1922 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_report_9 -%WER 12.04 [ 2693 / 22363, 226 ins, 271 del, 2196 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_report_9 -%WER 12.29 [ 2749 / 22363, 273 ins, 266 del, 2210 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_report_10 %WER 17.82 [ 3986 / 22363, 315 ins, 618 del, 3053 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_report_12 %WER 18.15 [ 4059 / 22363, 335 ins, 589 del, 3135 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_report_11 %WER 18.42 [ 4119 / 22363, 346 ins, 590 del, 3183 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_report_11 @@ -27,10 +21,6 @@ Report Results WER: %WER 25.66 [ 5738 / 22363, 478 ins, 838 del, 4422 sub ] exp/tri2a/decode/wer_report_14 %WER 26.38 [ 5900 / 22363, 435 ins, 929 del, 4536 sub ] exp/tri1/decode/wer_report_15 Conversational Results WER: -%WER 21.59 [ 10213 / 47305, 944 ins, 3092 del, 6177 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_conversational_9 -%WER 24.77 [ 11716 / 47305, 1098 ins, 3579 del, 7039 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_conversational_9 -%WER 26.78 [ 12670 / 47305, 1741 ins, 2434 del, 8495 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_conversational_9 -%WER 27.55 [ 13032 / 47305, 1800 ins, 2666 del, 8566 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_conversational_11 %WER 34.10 [ 16133 / 47305, 1903 ins, 3245 del, 10985 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_conversational_11 %WER 34.81 [ 16466 / 47305, 2077 ins, 3037 del, 11352 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_conversational_10 %WER 35.19 [ 16648 / 47305, 1933 ins, 3264 del, 11451 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_conversational_11 @@ -49,10 +39,6 @@ Conversational Results WER: %WER 45.92 [ 21724 / 47305, 1995 ins, 5213 del, 14516 sub ] exp/tri2a/decode/wer_conversational_14 %WER 46.86 [ 22166 / 47305, 2212 ins, 4819 del, 15135 sub ] exp/tri1/decode/wer_conversational_13 Combined Results for Reports and Conversational WER: -%WER 17.64 [ 12286 / 69668, 1310 ins, 2807 del, 8169 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_8 -%WER 20.26 [ 14114 / 69668, 1261 ins, 3892 del, 8961 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_9 -%WER 22.05 [ 15363 / 69668, 1967 ins, 2705 del, 10691 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_9 -%WER 22.66 [ 15786 / 69668, 2047 ins, 2955 del, 10784 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_11 %WER 28.89 [ 20127 / 69668, 2244 ins, 3829 del, 14054 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_11 %WER 29.48 [ 20541 / 69668, 2243 ins, 3860 del, 14438 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_11 %WER 29.81 [ 20767 / 69668, 2279 ins, 3854 del, 14634 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_11 From 05d9a3d5ed6b35b37c4b85e95f3058f25bf18401 Mon Sep 17 00:00:00 2001 From: huangruizhe Date: Tue, 22 Jan 2019 21:24:54 -0500 Subject: [PATCH 006/235] [egs] Add RESULTS file for the tedlium s5_r3 (release 3) setup (#3003) --- egs/tedlium/s5_r3/RESULTS | 32 +++++++++++++++++++ egs/tedlium/s5_r3/local/ted_download_rnnlm.sh | 2 +- egs/tedlium/s5_r3/results.sh | 21 ++++++++++-- 3 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 egs/tedlium/s5_r3/RESULTS diff --git a/egs/tedlium/s5_r3/RESULTS b/egs/tedlium/s5_r3/RESULTS new file mode 100644 index 00000000000..b2f9526a8fd --- /dev/null +++ b/egs/tedlium/s5_r3/RESULTS @@ -0,0 +1,32 @@ +# This RESULTS file was obtained by running ./run.sh and then ./result.sh + +%WER 28.32 [ 5037 / 17783, 615 ins, 1171 del, 3251 sub ] exp/tri1/decode_nosp_dev/wer_10 +%WER 26.99 [ 4799 / 17783, 603 ins, 1169 del, 3027 sub ] exp/tri1/decode_nosp_dev_rescore/wer_10 +%WER 27.76 [ 7634 / 27500, 776 ins, 1689 del, 5169 sub ] exp/tri1/decode_nosp_test/wer_11 +%WER 26.52 [ 7292 / 27500, 766 ins, 1611 del, 4915 sub ] exp/tri1/decode_nosp_test_rescore/wer_11 +%WER 23.38 [ 4158 / 17783, 603 ins, 953 del, 2602 sub ] exp/tri2/decode_dev/wer_14 +%WER 21.98 [ 3909 / 17783, 597 ins, 910 del, 2402 sub ] exp/tri2/decode_dev_rescore/wer_14 +%WER 24.12 [ 4289 / 17783, 600 ins, 1014 del, 2675 sub ] exp/tri2/decode_nosp_dev/wer_12 +%WER 22.96 [ 4083 / 17783, 631 ins, 931 del, 2521 sub ] exp/tri2/decode_nosp_dev_rescore/wer_11 +%WER 23.30 [ 6408 / 27500, 727 ins, 1375 del, 4306 sub ] exp/tri2/decode_nosp_test/wer_13 +%WER 22.10 [ 6078 / 27500, 746 ins, 1281 del, 4051 sub ] exp/tri2/decode_nosp_test_rescore/wer_12 +%WER 22.31 [ 6134 / 27500, 794 ins, 1148 del, 4192 sub ] exp/tri2/decode_test/wer_13 +%WER 21.06 [ 5791 / 27500, 737 ins, 1147 del, 3907 sub ] exp/tri2/decode_test_rescore/wer_14 +%WER 19.99 [ 3554 / 17783, 570 ins, 816 del, 2168 sub ] exp/tri3_cleaned/decode_dev/wer_16 +%WER 18.92 [ 3364 / 17783, 588 ins, 791 del, 1985 sub ] exp/tri3_cleaned/decode_dev_rescore/wer_15 +%WER 23.85 [ 4241 / 17783, 686 ins, 874 del, 2681 sub ] exp/tri3_cleaned/decode_dev.si/wer_13 +%WER 17.73 [ 4876 / 27500, 700 ins, 935 del, 3241 sub ] exp/tri3_cleaned/decode_test/wer_16 +%WER 16.72 [ 4599 / 27500, 686 ins, 906 del, 3007 sub ] exp/tri3_cleaned/decode_test_rescore/wer_16 +%WER 22.10 [ 6077 / 27500, 864 ins, 1075 del, 4138 sub ] exp/tri3_cleaned/decode_test.si/wer_13 +%WER 19.63 [ 3490 / 17783, 585 ins, 809 del, 2096 sub ] exp/tri3/decode_dev/wer_15 +%WER 18.56 [ 3300 / 17783, 558 ins, 817 del, 1925 sub ] exp/tri3/decode_dev_rescore/wer_16 +%WER 23.75 [ 4224 / 17783, 661 ins, 917 del, 2646 sub ] exp/tri3/decode_dev.si/wer_14 +%WER 17.92 [ 4928 / 27500, 730 ins, 921 del, 3277 sub ] exp/tri3/decode_test/wer_14 +%WER 16.80 [ 4621 / 27500, 650 ins, 973 del, 2998 sub ] exp/tri3/decode_test_rescore/wer_17 +%WER 22.16 [ 6095 / 27500, 849 ins, 1070 del, 4176 sub ] exp/tri3/decode_test.si/wer_13 +%WER 8.17 [ 1453 / 17783, 242 ins, 310 del, 901 sub ] exp/chain_cleaned/tdnnf_1a/decode_dev/wer_9 +%WER 7.61 [ 1354 / 17783, 236 ins, 300 del, 818 sub ] exp/chain_cleaned/tdnnf_1a/decode_dev_rescore/wer_9 +%WER 6.17 [ 1097 / 17783, 207 ins, 292 del, 598 sub ] exp/chain_cleaned/tdnnf_1a/decode_dev_rnnlm_lstm_tdnn_a_averaged/wer_10 +%WER 8.16 [ 2245 / 27500, 288 ins, 605 del, 1352 sub ] exp/chain_cleaned/tdnnf_1a/decode_test/wer_9 +%WER 7.75 [ 2131 / 27500, 264 ins, 643 del, 1224 sub ] exp/chain_cleaned/tdnnf_1a/decode_test_rescore/wer_10 +%WER 6.84 [ 1880 / 27500, 283 ins, 533 del, 1064 sub ] exp/chain_cleaned/tdnnf_1a/decode_test_rnnlm_lstm_tdnn_a_averaged/wer_8 diff --git a/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh b/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh index 431d44c6ff6..6cbcaaa85ee 100755 --- a/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh +++ b/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh @@ -14,7 +14,7 @@ wget --continue http://kaldi-asr.org/models/5/tedlium_rnnlm.tgz -P exp/rnnlm_lst cd exp/rnnlm_lstm_tdnn_a_averaged tar -xvzf tedlium_rnnlm.tgz || exit 1 rm tedlium_rnnlm.tgz -mkdir config +mkdir -p config cd ../.. cp data/lang/words.txt exp/rnnlm_lstm_tdnn_a_averaged/config/words.txt echo " 152217" >> exp/rnnlm_lstm_tdnn_a_averaged/config/words.txt diff --git a/egs/tedlium/s5_r3/results.sh b/egs/tedlium/s5_r3/results.sh index 98bcab94ec5..3e318cb4bc7 100755 --- a/egs/tedlium/s5_r3/results.sh +++ b/egs/tedlium/s5_r3/results.sh @@ -1,10 +1,25 @@ #!/bin/bash +# The output of this script (after successfully running ./run.sh) can be found in the RESULTS file. + filter_regexp=. [ $# -ge 1 ] && filter_regexp=$1 -for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null - for x in exp/{mono,tri,sgmm,nnet,dnn,lstm,chain}*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null | grep $filter_regexp - for x in exp/{mono,tri,sgmm,nnet,dnn,lstm,chain}*/*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null | grep $filter_regexp +for x in exp/*/decode*; do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done 2>/dev/null + +for x in exp/{mono,tri,sgmm,nnet,dnn,lstm,chain}*/decode*; do + [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; +done 2>/dev/null | grep $filter_regexp + +for x in exp/{mono,tri,sgmm,nnet,dnn,lstm,chain}*/*/decode*; do + [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; +done 2>/dev/null | grep $filter_regexp + +for x in exp/{mono,tri,sgmm,nnet,dnn,lstm,chain}*/*/decode*; do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done 2>/dev/null | grep $filter_regexp + exit 0 From 1dcdf80c587190de9ae189a8c2bfd071e98ca9c9 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 26 Jan 2019 13:54:56 -0500 Subject: [PATCH 007/235] [src] Fixes to grammar-fst code to handle LM-disambig symbols properly (#3000) thanks: armando.muscariello@gmail.com --- src/decoder/grammar-fst.cc | 124 ++++++++++++++++++++++++++++++++++++- src/decoder/grammar-fst.h | 17 ++--- src/doc/grammar.dox | 2 +- 3 files changed, 132 insertions(+), 11 deletions(-) diff --git a/src/decoder/grammar-fst.cc b/src/decoder/grammar-fst.cc index 6f95993d078..27d8c9998ea 100644 --- a/src/decoder/grammar-fst.cc +++ b/src/decoder/grammar-fst.cc @@ -443,6 +443,98 @@ void GrammarFst::Read(std::istream &is, bool binary) { } +/** + This utility function input-determinizes a specified state s of the FST + 'fst'. (This input-determinizes while treating epsilon as a real symbol, + although for the application we expect to use it, there won't be epsilons). + + What this function does is: for any symbol i that appears as the ilabel of + more than one arc leaving state s of FST 'fst', it creates an additional + state, it creates a new state t with epsilon-input transitions leaving it for + each of those multiple arcs leaving state s; it deletes the original arcs + leaving state s; and it creates a single arc leaving state s to the newly + created state with the ilabel i on it. It sets the weights as necessary to + preserve equivalence and also to ensure that if, prior to this modification, + the FST was stochastic when cast to the log semiring (see + IsStochasticInLog()), it still will be. I.e. when interpreted as + negative logprobs, the weight from state s to t would be the sum of + the weights on the original arcs leaving state s. + + This is used as a very cheap solution when preparing FSTs for the grammar + decoder, to ensure that there is only one entry-state to the sub-FST for each + phonetic left-context; this keeps the grammar-FST code (i.e. the code that + stitches them together) simple. Of course it will tend to introduce + unnecessary epsilons, and if we were careful we might be able to remove + some of those, but this wouldn't have a substantial impact on overall + decoder performance so we don't bother. + */ +static void InputDeterminizeSingleState(StdArc::StateId s, + VectorFst *fst) { + bool was_input_deterministic = true; + typedef StdArc Arc; + typedef Arc::StateId StateId; + typedef Arc::Label Label; + typedef Arc::Weight Weight; + + struct InfoForIlabel { + std::vector arc_indexes; // indexes of all arcs with this ilabel + float tot_cost; // total cost of all arcs leaving state s for this + // ilabel, summed as if they were negative log-probs. + StateId new_state; // state-id of new state, if any, that we have created + // to remove duplicate symbols with this ilabel. + InfoForIlabel(): new_state(-1) { } + }; + + std::unordered_map label_map; + + size_t arc_index = 0; + for (ArcIterator > aiter(*fst, s); + !aiter.Done(); aiter.Next(), ++arc_index) { + const Arc &arc = aiter.Value(); + InfoForIlabel &info = label_map[arc.ilabel]; + if (info.arc_indexes.empty()) { + info.tot_cost = arc.weight.Value(); + } else { + info.tot_cost = -kaldi::LogAdd(-info.tot_cost, -arc.weight.Value()); + was_input_deterministic = false; + } + info.arc_indexes.push_back(arc_index); + } + + if (was_input_deterministic) + return; // Nothing to do. + + // 'new_arcs' will contain the modified list of arcs + // leaving state s + std::vector new_arcs; + new_arcs.reserve(arc_index); + arc_index = 0; + for (ArcIterator > aiter(*fst, s); + !aiter.Done(); aiter.Next(), ++arc_index) { + const Arc &arc = aiter.Value(); + Label ilabel = arc.ilabel; + InfoForIlabel &info = label_map[ilabel]; + if (info.arc_indexes.size() == 1) { + new_arcs.push_back(arc); // no changes needed + } else { + if (info.new_state < 0) { + info.new_state = fst->AddState(); + // add arc from state 's' to newly created state. + new_arcs.push_back(Arc(ilabel, 0, Weight(info.tot_cost), + info.new_state)); + } + // add arc from new state to original destination of this arc. + fst->AddArc(info.new_state, Arc(0, arc.olabel, + Weight(arc.weight.Value() - info.tot_cost), + arc.nextstate)); + } + } + fst->DeleteArcs(s); + for (size_t i = 0; i < new_arcs.size(); i++) + fst->AddArc(s, new_arcs[i]); +} + + // This class contains the implementation of the function // PrepareForGrammarFst(), which is declared in grammar-fst.h. class GrammarFstPreparer { @@ -475,6 +567,12 @@ class GrammarFstPreparer { // OK, state s is a special state. FixArcsToFinalStates(s); MaybeAddFinalProbToState(s); + // The following ensures that the start-state of sub-FSTs only has + // a single arc per left-context phone (the graph-building recipe can + // end up creating more than one if there were disambiguation symbols, + // e.g. for langauge model backoff). + if (s == fst_->Start() && IsEntryState(s)) + InputDeterminizeSingleState(s, fst_); } } } @@ -487,7 +585,7 @@ class GrammarFstPreparer { // Returns true if state 's' has at least one arc coming out of it with a // special nonterminal-related ilabel on it (i.e. an ilabel >= - // kNontermBigNumber) + // kNontermBigNumber), and false otherwise. bool IsSpecialState(StateId s) const; // This function verifies that state s does not currently have any @@ -509,6 +607,10 @@ class GrammarFstPreparer { // modify this state (by adding input-epsilon arcs), and false otherwise. bool NeedEpsilons(StateId s) const; + // Returns true if state s (which is expected to be the start state, although we + // don't check this) has arcs with nonterminal symbols #nonterm_begin. + bool IsEntryState(StateId s) const; + // Fixes any final-prob-related problems with this state. The problem we aim // to fix is that there may be arcs with nonterminal symbol #nonterm_end which // transition from this state to a state with non-unit final prob. This @@ -599,6 +701,24 @@ bool GrammarFstPreparer::IsSpecialState(StateId s) const { return false; } +bool GrammarFstPreparer::IsEntryState(StateId s) const { + int32 big_number = kNontermBigNumber, + encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_); + + for (ArcIterator aiter(*fst_, s ); !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + int32 nonterminal = (arc.ilabel - big_number) / + encoding_multiple; + // we check that at least one has label with nonterminal equal to #nonterm_begin... + // in fact they will all have this value if at least one does, and this was checked + // in NeedEpsilons(). + if (nonterminal == kNontermBegin) + return true; + } + return false; +} + + bool GrammarFstPreparer::NeedEpsilons(StateId s) const { // See the documentation for GetCategoryOfArc() for explanation of what these are. @@ -647,7 +767,7 @@ bool GrammarFstPreparer::NeedEpsilons(StateId s) const { if (nonterminal == GetPhoneSymbolFor(kNontermBegin) && s != fst_->Start()) { KALDI_ERR << "#nonterm_begin symbol is present but this is not the " - "first arc. Did you do fstdeterminizestar while compiling?"; + "first state. Did you do fstdeterminizestar while compiling?"; } if (nonterminal == GetPhoneSymbolFor(kNontermEnd)) { if (fst_->NumArcs(arc.nextstate) != 0 || diff --git a/src/decoder/grammar-fst.h b/src/decoder/grammar-fst.h index f66933c132d..b82d7b3bc9f 100644 --- a/src/decoder/grammar-fst.h +++ b/src/decoder/grammar-fst.h @@ -229,14 +229,15 @@ class GrammarFst { an arc-index leaving a particular state in an FST (i.e. an index that we could use to Seek() to the matching arc). - @param [in] fst The FST we are looking for state-indexes for - @param [in] entry_state The state in the FST-- must have arcs with - ilabels decodable as (nonterminal_symbol, left_context_phone). - Will either be the start state (if 'nonterminal_symbol' - corresponds to #nonterm_begin), or an internal state - (if 'nonterminal_symbol' corresponds to #nonterm_reenter). - The arc-indexes of those arcs will be the values - we set in 'phone_to_arc' + @param [in] fst The FST that is being entered (or reentered) + @param [in] entry_state The state in 'fst' which is being entered + (or reentered); will be fst.Start() if it's being + entered. It must have arcs with ilabels decodable as + (nonterminal_symbol, left_context_phone). Will either be the + start state (if 'nonterminal_symbol' corresponds to + #nonterm_begin), or an internal state (if 'nonterminal_symbol' + corresponds to #nonterm_reenter). The arc-indexes of those + arcs will be the values we set in 'phone_to_arc' @param [in] nonterminal_symbol The index in phones.txt of the nonterminal symbol we expect to be encoded in the ilabels of the arcs leaving 'entry_state'. Will either correspond diff --git a/src/doc/grammar.dox b/src/doc/grammar.dox index d1c6f51f349..30396041d22 100644 --- a/src/doc/grammar.dox +++ b/src/doc/grammar.dox @@ -352,7 +352,7 @@ Z_S 243 The special symbols in CLG.fst will be as follows. The following special symbols may appear in any CLG graph, top-level or not: - - When any graph invokes a sub-graph, there will be n arc with an ilabel + - When any graph invokes a sub-graph, there will be an arc with an ilabel (\#nonterm:foo, left-context-phone) representing the user-specified nonterminal and the actual left-context, which will be followed by arcs with ilabels of the form (\#nonterm_reenter, From 6f565123183f6b052f4b05b64e4ba1137ff71ccf Mon Sep 17 00:00:00 2001 From: Christoph Boeddeker Date: Wed, 30 Jan 2019 20:38:20 +0100 Subject: [PATCH 008/235] [src] Cosmetic change to mel computation (fix option string) (#3011) --- src/feat/mel-computations.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h index 5df36c8cb90..7053da54f3a 100644 --- a/src/feat/mel-computations.h +++ b/src/feat/mel-computations.h @@ -63,7 +63,7 @@ struct MelBanksOptions { opts->Register("low-freq", &low_freq, "Low cutoff frequency for mel bins"); opts->Register("high-freq", &high_freq, - "High cutoff frequency for mel bins (if < 0, offset from Nyquist)"); + "High cutoff frequency for mel bins (if <= 0, offset from Nyquist)"); opts->Register("vtln-low", &vtln_low, "Low inflection point in piecewise linear VTLN warping function"); opts->Register("vtln-high", &vtln_high, From 56cfb95d094acda857a3e402bd6e37078a859cc7 Mon Sep 17 00:00:00 2001 From: David Zurow Date: Fri, 1 Feb 2019 13:31:35 -0500 Subject: [PATCH 009/235] [src] Fix Visual Studio error due to alternate syntactic form of noreturn (#3018) --- src/base/kaldi-error.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/kaldi-error.cc b/src/base/kaldi-error.cc index 3eeebe01910..e1aec2e59db 100644 --- a/src/base/kaldi-error.cc +++ b/src/base/kaldi-error.cc @@ -221,7 +221,7 @@ FatalMessageLogger::FatalMessageLogger(LogMessageEnvelope::Severity severity, } } -[[ noreturn ]] FatalMessageLogger::~FatalMessageLogger() noexcept(false) { +FatalMessageLogger::~FatalMessageLogger [[ noreturn ]] () noexcept(false) { std::string str = GetMessage(); // print the mesage (or send to logging handler), From 9e358985ee8f934cf5228e7b88a8a695ccb3456b Mon Sep 17 00:00:00 2001 From: Caley Baek Date: Sat, 2 Feb 2019 08:16:22 +0900 Subject: [PATCH 010/235] [egs] Fix location of sequitur installation (#3017) --- egs/voxforge/s5/local/voxforge_prepare_dict.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/voxforge/s5/local/voxforge_prepare_dict.sh b/egs/voxforge/s5/local/voxforge_prepare_dict.sh index 4242af29d25..daf4e2326e5 100755 --- a/egs/voxforge/s5/local/voxforge_prepare_dict.sh +++ b/egs/voxforge/s5/local/voxforge_prepare_dict.sh @@ -49,7 +49,7 @@ if [[ "$(uname)" == "Darwin" ]]; then alias readlink=greadlink fi -sequitur=$KALDI_ROOT/tools/sequitur +sequitur=$KALDI_ROOT/tools/sequitur-g2p export PATH=$PATH:$sequitur/bin export PYTHONPATH=$PYTHONPATH:`utils/make_absolute.sh $sequitur/lib/python*/site-packages` From a51bd964b7919d631ee84e5d5a58076b666d5df2 Mon Sep 17 00:00:00 2001 From: David Zurow Date: Sat, 2 Feb 2019 20:45:13 -0500 Subject: [PATCH 011/235] [src] Fix w/ ifdef Visual Studio error from alternate syntactic form noreturn (#3020) --- src/base/kaldi-error.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/base/kaldi-error.cc b/src/base/kaldi-error.cc index e1aec2e59db..df03e85f148 100644 --- a/src/base/kaldi-error.cc +++ b/src/base/kaldi-error.cc @@ -220,8 +220,12 @@ FatalMessageLogger::FatalMessageLogger(LogMessageEnvelope::Severity severity, "severities kAssertFailed and kError"); } } - -FatalMessageLogger::~FatalMessageLogger [[ noreturn ]] () noexcept(false) { +#if defined(_MSC_VER) +FatalMessageLogger::~FatalMessageLogger [[ noreturn ]] () noexcept(false) +#else +[[ noreturn ]] FatalMessageLogger::~FatalMessageLogger() noexcept(false) +#endif +{ std::string str = GetMessage(); // print the mesage (or send to logging handler), From 41ea8cfbc634168af7b57480d2e8500f28c2514c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 3 Feb 2019 17:58:37 -0500 Subject: [PATCH 012/235] [egs] Some fixes to getting data in heroico recipe (#3021) --- egs/heroico/s5/cmd.sh | 1 + egs/heroico/s5/local/heroico_download.sh | 37 ----------------------- egs/heroico/s5/local/subs_prepare_data.pl | 2 +- egs/heroico/s5/run.sh | 22 +++++++++----- 4 files changed, 17 insertions(+), 45 deletions(-) delete mode 100755 egs/heroico/s5/local/heroico_download.sh diff --git a/egs/heroico/s5/cmd.sh b/egs/heroico/s5/cmd.sh index a427f3c16a5..533aad25db1 100755 --- a/egs/heroico/s5/cmd.sh +++ b/egs/heroico/s5/cmd.sh @@ -10,6 +10,7 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. +export cmd="retry.pl queue.pl" export train_cmd="retry.pl queue.pl" export decode_cmd="retry.pl queue.pl --mem 2G" diff --git a/egs/heroico/s5/local/heroico_download.sh b/egs/heroico/s5/local/heroico_download.sh deleted file mode 100755 index 9c58fe37537..00000000000 --- a/egs/heroico/s5/local/heroico_download.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -# Copyright 2018 John Morgan -# Apache 2.0. - -speech=$1 -lexicon=$2 - -download_dir=$(pwd) -tmpdir=data/local/tmp -data_dir=$tmpdir/LDC2006S37/data - -mkdir -p $tmpdir - -# download the corpus from openslr - -if [ ! -f $download_dir/heroico.tar.gz ]; then - wget -O $download_dir/heroico.tar.gz $speech - - ( - cd $download_dir - tar -xzf heroico.tar.gz - ) -fi - -mkdir -p data/local/dict $tmpdir/dict - -# download the dictionary from openslr - -if [ ! -f $download_dir/santiago.tar.gz ]; then - wget -O $download_dir/santiago.tar.gz $lexicon -fi - -( - cd $download_dir - tar -xzf santiago.tar.gz -) diff --git a/egs/heroico/s5/local/subs_prepare_data.pl b/egs/heroico/s5/local/subs_prepare_data.pl index a7e0cfb0c6e..e39db79f610 100755 --- a/egs/heroico/s5/local/subs_prepare_data.pl +++ b/egs/heroico/s5/local/subs_prepare_data.pl @@ -19,7 +19,7 @@ # input and output files -my $corpus = "OpenSubtitles2018.en-es.es"; +my $corpus = "OpenSubtitles.en-es.es"; my $symbol_table = "data/lang/words.txt"; my $filtered = "data/local/tmp/subs/lm/es.txt"; my $oovs = "data/local/tmp/subs/lm/oovs.txt"; diff --git a/egs/heroico/s5/run.sh b/egs/heroico/s5/run.sh index 67ad87e55f9..4cc5617e985 100755 --- a/egs/heroico/s5/run.sh +++ b/egs/heroico/s5/run.sh @@ -9,11 +9,11 @@ stage=0 datadir=/export/corpora5/LDC/LDC2006S37 # The corpus and lexicon are on openslr.org -speech="http://www.openslr.org/resources/39/LDC2006S37.tar.gz" -lexicon="http://www.openslr.org/resources/34/santiago.tar.gz" +#speech_url="http://www.openslr.org/resources/39/LDC2006S37.tar.gz" +lexicon_url="http://www.openslr.org/resources/34/santiago.tar.gz" # Location of the Movie subtitles text corpus -subs_src="http://opus.lingfil.uu.se/download.php?f=OpenSubtitles2018/en-es.txt.zip" +subtitles_url="http://opus.lingfil.uu.se/download.php?f=OpenSubtitles2018/en-es.txt.zip" . utils/parse_options.sh @@ -26,14 +26,22 @@ set -u tmpdir=data/local/tmp if [ $stage -le 0 ]; then - # download the corpus from openslr - local/heroico_download.sh $speech $lexicon + if [ ! -d $datadir ]; then + echo "$0: please download and un-tar http://www.openslr.org/resources/39/LDC2006S37.tar.gz" + echo " and set $datadir to the directory where it is located." + exit 1 + fi + if [ ! -s santiago.txt ]; then + echo "$0: downloading the lexicon" + wget -c http://www.openslr.org/resources/34/santiago.tar.gz + tar -xvzf santiago.tar.gz + fi # Get data for lm training - local/subs_download.sh $subs_src + local/subs_download.sh $subtitles_url fi if [ $stage -le 1 ]; then - echo "Makin lists for building models." + echo "Making lists for building models." local/prepare_data.sh $datadir fi From fb514dc700301f0437299473e630a20bf0e4d652 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Mon, 4 Feb 2019 13:41:17 -0500 Subject: [PATCH 013/235] [egs] BABEL script fix: avoid make_L_align.sh generating invalid files (#3022) --- .gitignore | 1 + .../s5d/conf/lang/404-georgian.FLP.official.conf | 4 ++-- egs/babel/s5d/local/make_L_align.sh | 14 ++++++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 910d5cb019d..4cf0fa4efa9 100644 --- a/.gitignore +++ b/.gitignore @@ -83,6 +83,7 @@ GSYMS /tools/ATLAS/ /tools/atlas3.8.3.tar.gz /tools/irstlm/ +/tools/mitlm/ /tools/openfst /tools/openfst-1.3.2.tar.gz /tools/openfst-1.3.2/ diff --git a/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf b/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf index a6b22de419f..9cd043716ce 100644 --- a/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf +++ b/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf @@ -75,8 +75,8 @@ unsup_data_list=./conf/lists/404-georgian/untranscribed-training.list unsup_nj=32 -lexicon_file= -lexiconFlags="--romanized --oov " +lexicon_file=/export/corpora/LDC/LDC2016S12/IARPA_BABEL_OP3_404/conversational/reference_materials/lexicon.txt +lexiconFlags=" --romanized --oov " diff --git a/egs/babel/s5d/local/make_L_align.sh b/egs/babel/s5d/local/make_L_align.sh index 50e46a00493..41e9ff32958 100755 --- a/egs/babel/s5d/local/make_L_align.sh +++ b/egs/babel/s5d/local/make_L_align.sh @@ -34,18 +34,24 @@ tmpdir=$1 dir=$2 outdir=$3 +for f in $dir/phones/optional_silence.txt $dir/phones.txt $dir/words.txt ; do + [ ! -f $f ] && echo "$0: The file $f must exist!" exit 1 +fi + silphone=`cat $dir/phones/optional_silence.txt` || exit 1; +if [ ! -f $tmpdir/lexicon.txt ] && [ ! -f $tmpdir/lexiconp.txt ] ; then + echo "$0: At least one of the files $tmpdir/lexicon.txt or $tmpdir/lexiconp.txt must exist" >&2 + exit 1 +fi + # Create lexicon with alignment info if [ -f $tmpdir/lexicon.txt ] ; then cat $tmpdir/lexicon.txt | \ awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' -elif [ -f $tmpdir/lexiconp.txt ] ; then +else cat $tmpdir/lexiconp.txt | \ awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' -else - echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist" - exit 1 fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \ fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \ --keep_isymbols=false --keep_osymbols=false | \ From afc5e78c24bb190b413b1b021c5c3e41de91b4cc Mon Sep 17 00:00:00 2001 From: jdieguez Date: Wed, 6 Feb 2019 21:02:58 +0100 Subject: [PATCH 014/235] [src] Fix to older online decoding code in online/ (OnlineFeInput; was broken by commit cc2469e8). (#3025) --- src/online/online-feat-input.h | 38 +++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/online/online-feat-input.h b/src/online/online-feat-input.h index b730a373ac0..e433c386212 100644 --- a/src/online/online-feat-input.h +++ b/src/online/online-feat-input.h @@ -31,6 +31,7 @@ #include "online-audio-source.h" #include "feat/feature-functions.h" +#include "feat/feature-window.h" namespace kaldi { @@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf { // "frame_size" - frame extraction window size in audio samples // "frame_shift" - feature frame width in audio samples OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe, - const int32 frame_size, const int32 frame_shift); + const int32 frame_size, const int32 frame_shift, + const bool snip_edges = true); virtual int32 Dim() const { return extractor_->Dim(); } @@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf { const int32 frame_size_; const int32 frame_shift_; Vector wave_; // the samples to be passed for extraction + Vector wave_remainder_; // the samples remained from the previous + // feature batch + FrameExtractionOptions frame_opts_; KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineFeInput); }; template OnlineFeInput::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe, - int32 frame_size, int32 frame_shift) + int32 frame_size, int32 frame_shift, + bool snip_edges) : source_(au_src), extractor_(fe), - frame_size_(frame_size), frame_shift_(frame_shift) {} + frame_size_(frame_size), frame_shift_(frame_shift) { + // we need a FrameExtractionOptions to call NumFrames() + // 1000 is just a fake sample rate which equates ms and samples + frame_opts_.samp_freq = 1000; + frame_opts_.frame_shift_ms = frame_shift; + frame_opts_.frame_length_ms = frame_size; + frame_opts_.snip_edges = snip_edges; +} template bool OnlineFeInput::Compute(Matrix *output) { @@ -311,11 +324,26 @@ OnlineFeInput::Compute(Matrix *output) { bool ans = source_->Read(&read_samples); + Vector all_samples(wave_remainder_.Dim() + read_samples.Dim()); + all_samples.Range(0, wave_remainder_.Dim()).CopyFromVec(wave_remainder_); + all_samples.Range(wave_remainder_.Dim(), read_samples.Dim()). + CopyFromVec(read_samples); + // Extract the features - if (read_samples.Dim() >= frame_size_) { - extractor_->Compute(read_samples, 1.0, output); + if (all_samples.Dim() >= frame_size_) { + // extract waveform remainder before calling Compute() + int32 num_frames = NumFrames(all_samples.Dim(), frame_opts_); + // offset is the amount at the start that has been extracted. + int32 offset = num_frames * frame_shift_; + int32 remaining_len = all_samples.Dim() - offset; + wave_remainder_.Resize(remaining_len); + KALDI_ASSERT(remaining_len >= 0); + if (remaining_len > 0) + wave_remainder_.CopyFromVec(SubVector(all_samples, offset, remaining_len)); + extractor_->Compute(all_samples, 1.0, output); } else { output->Resize(0, 0); + wave_remainder_ = all_samples; } return ans; From 226cbf7bf89ec4b9029eccdb5b5b76074a710ee2 Mon Sep 17 00:00:00 2001 From: Ondrej Platek Date: Fri, 8 Feb 2019 17:26:08 +0100 Subject: [PATCH 015/235] [script] Fix unset bash variable in make_mfcc.sh (#3030) --- egs/wsj/s5/steps/make_mfcc.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/egs/wsj/s5/steps/make_mfcc.sh b/egs/wsj/s5/steps/make_mfcc.sh index c88e0d65e65..8514ce4e38d 100755 --- a/egs/wsj/s5/steps/make_mfcc.sh +++ b/egs/wsj/s5/steps/make_mfcc.sh @@ -75,6 +75,8 @@ if [ -f $data/spk2warp ]; then elif [ -f $data/utt2warp ]; then echo "$0 [info]: using VTLN warp factors from $data/utt2warp" vtln_opts="--vtln-map=ark:$data/utt2warp" +else + vtln_opts="" fi for n in $(seq $nj); do From 6fc4c6020318f59c423eb48e319d67a6bbe91e86 Mon Sep 17 00:00:00 2001 From: Ondrej Platek Date: Fri, 8 Feb 2019 17:59:36 +0100 Subject: [PATCH 016/235] [scripts] Extend limit_num_gpus.sh to support --num-gpus 0. (#3027) --- egs/wsj/s5/utils/parallel/limit_num_gpus.sh | 32 ++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/egs/wsj/s5/utils/parallel/limit_num_gpus.sh b/egs/wsj/s5/utils/parallel/limit_num_gpus.sh index d9707a816c4..9d7caddd1f6 100755 --- a/egs/wsj/s5/utils/parallel/limit_num_gpus.sh +++ b/egs/wsj/s5/utils/parallel/limit_num_gpus.sh @@ -18,8 +18,8 @@ if [ "$1" == "--num-gpus" ]; then shift fi -if ! printf "%d" "$num_gpus" >/dev/null || [ $num_gpus -le 0 ]; then - echo $0: Must pass a positive interger after --num-gpus +if ! printf "%d" "$num_gpus" >/dev/null || [ $num_gpus -le -1 ]; then + echo $0: Must pass a positive interger or 0 after --num-gpus echo e.g. $0 --num-gpus 2 local/tfrnnlm/run_lstm.sh exit 1 fi @@ -35,18 +35,24 @@ CUDA_VISIBLE_DEVICES= num_total_gpus=`nvidia-smi -L | wc -l` num_gpus_assigned=0 -for i in `seq 0 $[$num_total_gpus-1]`; do -# going over all GPUs and check if it is idle, and add to the list if yes - if nvidia-smi -i $i | grep "No running processes found" >/dev/null; then - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}$i, && num_gpus_assigned=$[$num_gpus_assigned+1] - fi -# once we have enough GPUs, break out of the loop - [ $num_gpus_assigned -eq $num_gpus ] && break -done +if [ $num_gpus -eq 0 ] ; then + echo "$0: Running the job on CPU. Disabling submitting to gpu" + export CUDA_VISIBLE_DEVICES="" +else + for i in `seq 0 $[$num_total_gpus-1]`; do + # going over all GPUs and check if it is idle, and add to the list if yes + if nvidia-smi -i $i | grep "No running processes found" >/dev/null; then + CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}$i, && num_gpus_assigned=$[$num_gpus_assigned+1] + fi + # once we have enough GPUs, break out of the loop + [ $num_gpus_assigned -eq $num_gpus ] && break + done -[ $num_gpus_assigned -ne $num_gpus ] && echo Could not find enough idle GPUs && exit 1 + [ $num_gpus_assigned -ne $num_gpus ] && echo Could not find enough idle GPUs && exit 1 -export CUDA_VISIBLE_DEVICES=$(echo $CUDA_VISIBLE_DEVICES | sed "s=,$==g") + export CUDA_VISIBLE_DEVICES=$(echo $CUDA_VISIBLE_DEVICES | sed "s=,$==g") + + echo "$0: Running the job on GPU(s) $CUDA_VISIBLE_DEVICES" +fi -echo "$0: Running the job on GPU(s) $CUDA_VISIBLE_DEVICES" "$@" From 2f92bd971b275bb2f147e7cc9ea64c7e40572408 Mon Sep 17 00:00:00 2001 From: Teddyang Date: Fri, 15 Feb 2019 10:49:47 +0800 Subject: [PATCH 017/235] [scripts] fix bug in utils/add_lex_disambig.pl when sil-probs and pron-probs used (#3033) bug would likely have resulted in determinization failure (only when not using word-position-dependent phones). --- egs/wsj/s5/utils/add_lex_disambig.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/wsj/s5/utils/add_lex_disambig.pl b/egs/wsj/s5/utils/add_lex_disambig.pl index dd8a25de6e1..c4277e8dc06 100755 --- a/egs/wsj/s5/utils/add_lex_disambig.pl +++ b/egs/wsj/s5/utils/add_lex_disambig.pl @@ -122,6 +122,7 @@ if ($sil_probs) { shift @A; # Remove silprob shift @A; # Remove silprob + shift @A; # Remove silprob, there three numbers for sil_probs } while(@A > 0) { pop @A; # Remove last phone From 403c5ee02b7de14ddb72944eb1a1a14fd7d07855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Hernandez?= Date: Mon, 18 Feb 2019 18:26:26 +0100 Subject: [PATCH 018/235] [egs] Fix path in Tedlium r3 rnnlm training script (#3039) --- egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh b/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh index 32252db937d..73a684b6379 100755 --- a/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh +++ b/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh @@ -30,7 +30,6 @@ epochs=20 [ -z "$cmd" ] && cmd=$train_cmd text_from_audio=data/train/text -text=data/LM/train.txt wordlist=data/lang_chain/words.txt dev_sents=10000 text_dir=data/rnnlm/text @@ -44,8 +43,9 @@ done if [ $stage -le 0 ]; then mkdir -p $text_dir + gunzip -c db/TEDLIUM_release-3/LM/*.en.gz | sed 's/ <\/s>//g' > $text_dir/train.txt # shuffle text from audio and lm - cat $text_from_audio | cut -d ' ' -f2- | cat $text |\ + cat $text_from_audio | cut -d ' ' -f2- | cat $text_dir/train.txt |\ shuf > data/rnnlm/full_lm_data.shuffled # create dev and train sets based on audio and LM data cat data/rnnlm/full_lm_data.shuffled | head -n $dev_sents> $text_dir/dev.txt From cbc8eeb3a4e75fa4d3679ece969a9a65c99bb8ad Mon Sep 17 00:00:00 2001 From: saikiran valluri Date: Tue, 19 Feb 2019 00:02:18 -0500 Subject: [PATCH 019/235] Spanish Gigaword LM recipe --- .../s5_gigaword/cmd.sh | 15 + .../s5_gigaword/conf/decode.config | 6 + .../s5_gigaword/conf/mfcc.conf | 2 + .../s5_gigaword/conf/mfcc_hires.conf | 10 + .../s5_gigaword/conf/online_cmvn.conf | 1 + .../s5_gigaword/conf/plp.conf | 2 + .../local/callhome_create_splits.sh | 31 + .../s5_gigaword/local/callhome_data_prep.sh | 163 ++++ .../s5_gigaword/local/callhome_get_1_best.py | 75 ++ .../local/callhome_get_lattices.py | 115 +++ .../local/callhome_make_spk2gender.sh | 29 + .../s5_gigaword/local/callhome_make_trans.pl | 74 ++ .../s5_gigaword/local/callhome_text_pp.sh | 9 + .../s5_gigaword/local/chain/run_tdnn_1g.sh | 288 +++++++ .../s5_gigaword/local/clean_txt_dir.sh | 51 ++ .../s5_gigaword/local/create_oracle_ctm.sh | 30 + .../s5_gigaword/local/create_splits.sh | 30 + .../s5_gigaword/local/ctm.sh | 34 + .../s5_gigaword/local/decode_report.py | 148 ++++ .../s5_gigaword/local/find_unique_phones.pl | 25 + .../s5_gigaword/local/fix_stm.sh | 10 + .../flatten_gigaword/flatten_all_gigaword.sh | 15 + .../flatten_gigaword/flatten_one_gigaword.py | 61 ++ .../local/flatten_gigaword/run_flat.sh | 17 + .../s5_gigaword/local/fsp_create_test_lang.sh | 49 ++ .../s5_gigaword/local/fsp_data_prep.sh | 175 ++++ .../local/fsp_ideal_data_partitions.pl | 85 ++ .../s5_gigaword/local/fsp_make_spk2gender.sh | 29 + .../s5_gigaword/local/fsp_make_trans.pl | 81 ++ .../s5_gigaword/local/fsp_prepare_dict.sh | 142 ++++ .../s5_gigaword/local/fsp_train_lms.sh | 140 ++++ .../s5_gigaword/local/get_1_best.py | 62 ++ .../s5_gigaword/local/get_data_weights.pl | 39 + .../s5_gigaword/local/get_lattices.py | 115 +++ .../s5_gigaword/local/get_oracle.sh | 32 + .../s5_gigaword/local/isolate_phones.pl | 66 ++ .../s5_gigaword/local/latconvert.sh | 124 +++ .../s5_gigaword/local/merge_lexicons.py | 65 ++ .../s5_gigaword/local/monitor_denlats.sh | 31 + .../local/nnet3/run_ivector_common.sh | 187 +++++ .../s5_gigaword/local/pocolm_cust.sh | 117 +++ .../s5_gigaword/local/process_oracle.py | 64 ++ .../s5_gigaword/local/rescore.sh | 24 + .../s5_gigaword/local/rnnlm.sh | 84 ++ .../s5_gigaword/local/run_norm.sh | 33 + .../s5_gigaword/local/run_sgmm2x.sh | 57 ++ .../s5_gigaword/local/score.sh | 1 + .../s5_gigaword/local/score_oracle.sh | 29 + .../s5_gigaword/local/splits/dev | 20 + .../local/splits/split_callhome/dev | 20 + .../local/splits/split_callhome/test | 20 + .../local/splits/split_callhome/train | 80 ++ .../s5_gigaword/local/splits/split_fisher/dev | 20 + .../local/splits/split_fisher/dev2 | 20 + .../local/splits/split_fisher/test | 20 + .../local/splits/split_fisher/train | 759 ++++++++++++++++++ .../s5_gigaword/local/splits/test | 20 + .../s5_gigaword/local/splits/train | 80 ++ .../s5_gigaword/local/spron.pl | 304 +++++++ .../s5_gigaword/local/subset_data_prep.sh | 164 ++++ .../s5_gigaword/local/train_get_1_best.py | 79 ++ .../s5_gigaword/local/train_get_lattices.py | 125 +++ .../s5_gigaword/local/train_pocolm.sh | 39 + .../s5_gigaword/local/train_process_oracle.py | 79 ++ .../s5_gigaword/local/wer_output_filter | 5 + .../s5_gigaword/path.sh | 13 + .../s5_gigaword/path_venv.sh | 13 + egs/fisher_callhome_spanish/s5_gigaword/rnnlm | 1 + .../s5_gigaword/run.sh | 299 +++++++ egs/fisher_callhome_spanish/s5_gigaword/steps | 1 + egs/fisher_callhome_spanish/s5_gigaword/utils | 1 + 71 files changed, 5254 insertions(+) create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/cmd.sh create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_prepare_dict.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh create mode 120000 egs/fisher_callhome_spanish/s5_gigaword/local/score.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/test create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/train create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/path.sh create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh create mode 120000 egs/fisher_callhome_spanish/s5_gigaword/rnnlm create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/run.sh create mode 120000 egs/fisher_callhome_spanish/s5_gigaword/steps create mode 120000 egs/fisher_callhome_spanish/s5_gigaword/utils diff --git a/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh b/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh new file mode 100755 index 00000000000..0511bd2bbb0 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="retry.pl queue.pl" +export decode_cmd="retry.pl queue.pl --mem 8G" +export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config b/egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config new file mode 100644 index 00000000000..7908f178373 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config @@ -0,0 +1,6 @@ +# Use wider-than-normal decoding beams. +first_beam=16.0 +beam=20.0 +lat_beam=10.0 +min_lmwt=2 +max_lmwt=10 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf new file mode 100644 index 00000000000..ffb41a1aae4 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--sample-frequency=8000 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf new file mode 100644 index 00000000000..c4b73674cab --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf @@ -0,0 +1,2 @@ +# No non-default options for now. + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh new file mode 100755 index 00000000000..07814da46a9 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +data_dir=data +train_all=data/callhome_train_all + +if [ $# -lt 1 ]; then + echo "Specify the location of the split files" + exit 1; +fi + +splitFile=$1 + +# Train first +for split in train dev test +do + dirName=callhome_$split + + cp -r $train_all $data_dir/$dirName + + awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ + $splitFile/$split $train_all/segments > $data_dir/$dirName/segments + + n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$dirName/segments | sort | uniq | wc -l` + + echo "$n conversations left in split $dirName" + + utils/fix_data_dir.sh $data_dir/$dirName + utils/validate_data_dir.sh $data_dir/$dirName +done + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh new file mode 100755 index 00000000000..f61b0fa9519 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# The input is the Callhome Spanish Dataset. (*.sph files) +# In addition the transcripts are needed as well. +# To be run from one directory above this script. + +# Note: when creating your own data preparation scripts, it's a good idea +# to make sure that the speaker id (if present) is a prefix of the utterance +# id, that the output scp file is sorted on utterance id, and that the +# transcription file is exactly the same length as the scp file and is also +# sorted on utterance id (missing transcriptions should be removed from the +# scp file using e.g. scripts/filter_scp.pl) + +stage=0 + +export LC_ALL=C + + +if [ $# -lt 2 ]; then + echo "Arguments should be the location of the Callhome Spanish Speech and Transcript Directories, se +e ../run.sh for example." + exit 1; +fi + +cdir=`pwd` +dir=`pwd`/data/local/data +local=`pwd`/local +utils=`pwd`/utils +tmpdir=`pwd`/data/local/tmp + +. ./path.sh || exit 1; # Needed for KALDI_ROOT +export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin +sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +if [ ! -x $sph2pipe ]; then + echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; + exit 1; +fi +cd $dir + +# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command +# line arguments being absolute pathnames. +#rm -r links/ 2>/dev/null +mkdir -p links/ +ln -s $* links + +# Basic spot checks to see if we got the data that we needed +if [ ! -d links/LDC96S35 -o ! -d links/LDC96T17 ]; +then + echo "The speech and the data directories need to be named LDC96S35 and LDC96T17 respecti +vely" + exit 1; +fi + +if [ ! -d links/LDC96S35/CALLHOME/SPANISH/SPEECH/DEVTEST -o ! -d links/LDC96S35/CALLHOME/SPANISH/SPEECH/EVLTEST -o ! -d links/LDC96S35/CALLHOME/SPANISH/SPEECH/TRAIN ]; +then + echo "Dev, Eval or Train directories missing or not properly organised within the speech data dir" + exit 1; +fi + +#Check the transcripts directories as well to see if they exist +if [ ! -d links/LDC96T17/callhome_spanish_trans_970711/transcrp/devtest -o ! -d links/LDC96T17/callhome_spanish_trans_970711/transcrp/evltest -o ! -d links/LDC96T17/callhome_spanish_trans_970711/transcrp/train ] +then + echo "Transcript directories missing or not properly organised" + exit 1; +fi + +speech_train=$dir/links/LDC96S35/CALLHOME/SPANISH/SPEECH/TRAIN +speech_dev=$dir/links/LDC96S35/CALLHOME/SPANISH/SPEECH/DEVTEST +speech_test=$dir/links/LDC96S35/CALLHOME/SPANISH/SPEECH/EVLTEST +transcripts_train=$dir/links/LDC96T17/callhome_spanish_trans_970711/transcrp/train +transcripts_dev=$dir/links/LDC96T17/callhome_spanish_trans_970711/transcrp/devtest +transcripts_test=$dir/links/LDC96T17/callhome_spanish_trans_970711/transcrp/evltest + +fcount_train=`find ${speech_train} -iname '*.SPH' | wc -l` +fcount_dev=`find ${speech_dev} -iname '*.SPH' | wc -l` +fcount_test=`find ${speech_test} -iname '*.SPH' | wc -l` +fcount_t_train=`find ${transcripts_train} -iname '*.txt' | wc -l` +fcount_t_dev=`find ${transcripts_dev} -iname '*.txt' | wc -l` +fcount_t_test=`find ${transcripts_test} -iname '*.txt' | wc -l` + +#Now check if we got all the files that we needed +if [ $fcount_train != 80 -o $fcount_dev != 20 -o $fcount_test != 20 -o $fcount_t_train != 80 -o $fcount_t_dev != 20 -o $fcount_t_test != 20 ]; +then + echo "Incorrect number of files in the data directories" + echo "The paritions should contain 80/20/20 files" + exit 1; +fi + +if [ $stage -le 0 ]; then + #Gather all the speech files together to create a file list + ( + find $speech_train -iname '*.sph'; + find $speech_dev -iname '*.sph'; + find $speech_test -iname '*.sph'; + ) > $tmpdir/callhome_train_sph.flist + + #Get all the transcripts in one place + + ( + find $transcripts_train -iname '*.txt'; + find $transcripts_dev -iname '*.txt'; + find $transcripts_test -iname '*.txt'; + ) > $tmpdir/callhome_train_transcripts.flist + +fi + +if [ $stage -le 1 ]; then + $local/callhome_make_trans.pl $tmpdir + mkdir -p $dir/callhome_train_all + mv $tmpdir/callhome_reco2file_and_channel $dir/callhome_train_all/ +fi + +if [ $stage -le 2 ]; then + sort $tmpdir/callhome.text.1 | sed 's/^\s\s*|\s\s*$//g' | sed 's/\s\s*/ /g' > $dir/callhome_train_all/callhome.text + + #Create segments file and utt2spk file + ! cat $dir/callhome_train_all/callhome.text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > $dir/callhome_train_all/callhome_utt2spk \ + && echo "Error producing utt2spk file" && exit 1; + + cat $dir/callhome_train_all/callhome.text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; + $s = sprintf("%.2f", 0.01*$3); $e = sprintf("%.2f", 0.01*$4); print "$utt $reco $s $e\n"; ' >$dir/callhome_train_all/callhome_segments + + $utils/utt2spk_to_spk2utt.pl <$dir/callhome_train_all/callhome_utt2spk > $dir/callhome_train_all/callhome_spk2utt +fi + +if [ $stage -le 3 ]; then + for f in `cat $tmpdir/callhome_train_sph.flist`; do + # convert to absolute path + make_absolute.sh $f + done > $tmpdir/callhome_train_sph_abs.flist + + cat $tmpdir/callhome_train_sph_abs.flist | perl -ane 'm:/([^/]+)\.SPH$: || die "bad line $_; "; print lc($1)," $_"; ' > $tmpdir/callhome_sph.scp + cat $tmpdir/callhome_sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ + sort -k1,1 -u > $dir/callhome_train_all/callhome_wav.scp || exit 1; +fi + +if [ $stage -le 4 ]; then + # Build the speaker to gender map, the temporary file with the speaker in gender information is already created by fsp_make_trans.pl. + cd $cdir + #TODO: needs to be rewritten + $local/callhome_make_spk2gender.sh > $dir/callhome_train_all/callhome_spk2gender +fi + +# Rename files from the callhome directory +if [ $stage -le 5 ]; then + cd $dir/callhome_train_all + mv callhome.text text + mv callhome_segments segments + mv callhome_spk2utt spk2utt + mv callhome_wav.scp wav.scp + mv callhome_reco2file_and_channel reco2file_and_channel + mv callhome_spk2gender spk2gender + mv callhome_utt2spk utt2spk + cd $cdir +fi + +fix_data_dir.sh $dir/callhome_train_all || exit 1 +utils/validate_data_dir.sh --no-feats $dir/callhome_train_all || exit 1 + +echo "CALLHOME spanish Data preparation succeeded." + +exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py new file mode 100755 index 00000000000..a81818c2858 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# Extracts one best output for a set of files +# The list of files in the conversations for which 1 best output has to be extracted +# words.txt + +import os +import sys + +def findTranscription(timeDetail): + file1 = open('exp/tri5a/decode_callhome_dev/scoring/13.tra') + file2 = open('exp/tri5a/decode_callhome_train/scoring/13.tra') + for line in file1: + lineComp = line.split() + if lineComp[0] == timeDetail: + return " ".join(lineComp[1:]) + for line in file2: + lineComp = line.split() + if lineComp[0] == timeDetail: + return " ".join(lineComp[1:]) + # No result found + return -1 + + +wordsFile = open('exp/tri5a/graph/words.txt') +words = {} + +# Extract word list +for line in wordsFile: + lineComp = line.split() + words[int(lineComp[1])] = lineComp[0].strip() + +# Now read list of files in conversations +fileList = [] +#conversationList = open('/export/a04/gkumar/corpora/fishcall/joshkal-splits/provisional_dev') +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/train') +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# TODO: Make sure they match the order in which these english files are being written + +# Now get timing information to concatenate the ASR outputs +if not os.path.exists('exp/tri5a/one-best/ch_train'): + os.makedirs('exp/tri5a/one-best/ch_train') + +#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/asr.train', 'w+') +for item in fileList: + timingFile = open('/export/a04/gkumar/corpora/fishcall/callhome/tim/' + item + '.es') + newFile = open('exp/tri5a/one-best/ch_train/' + item + '.es', 'w+') + for line in timingFile: + timeInfo = line.split() + mergedTranslation = "" + for timeDetail in timeInfo: + #Locate this in ASR dev/test, this is going to be very slow + tmp = findTranscription(timeDetail) + if tmp != -1: + mergedTranslation = mergedTranslation + " " + tmp + mergedTranslation = mergedTranslation.strip() + transWords = [words[int(x)] for x in mergedTranslation.split()] + newFile.write(" ".join(transWords) + "\n") + provFile.write(" ".join(transWords) + "\n") + + newFile.close() +provFile.close() + + + + + + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py new file mode 100755 index 00000000000..4c96e01ce7e --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# Extracts one best output for a set of files +# The list of files in the conversations for which 1 best output has to be extracted +# words.txt + +from __future__ import print_function +import os +import sys +import subprocess + +latticeLocation = 'latjosh-2-callhome/lattices-pushed/' + +tmpdir = 'data/local/data/tmp/ch-d/lattmp' +invalidplfdir = 'data/local/data/tmp/ch-d/invalidplf' +symtable = '/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/data/lang/words.clean.txt' + +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/dev') +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/asr.test.plf', 'w+') +invalidPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/invalidPLF', 'w+') +blankPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/blankPLF', 'w+') +rmLines = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/removeLines', 'w+') + +if not os.path.exists(tmpdir): + os.makedirs(tmpdir) +if not os.path.exists(invalidplfdir): + os.makedirs(invalidplfdir) +else: + os.system("rm " + invalidplfdir + "/*") + +def latticeConcatenate(lat1, lat2): + ''' + Concatenates lattices, writes temporary results to tmpdir + ''' + if lat1 == "": + os.system('rm ' + tmpdir + '/tmp.lat') + return lat2 + else: + proc = subprocess.Popen(['fstconcat', lat1, lat2, (tmpdir + '/tmp.lat')]) + proc.wait() + return tmpdir + '/tmp.lat' + + +def findLattice(timeDetail): + ''' + Finds the lattice corresponding to a time segment + ''' + if os.path.isfile(latticeLocation + timeDetail + '.lat'): + return latticeLocation + timeDetail + '.lat' + else: + return -1 + + +# Now read list of files in conversations +fileList = [] +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# Now get timing information to concatenate the ASR outputs + +lineNo = 1 +for item in fileList: + timingFile = open('/export/a04/gkumar/corpora/fishcall/callhome/tim/' + item + '.es') + for line in timingFile: + timeInfo = line.split() + + # For utterances that are concatenated in the translation file, + # the corresponding FSTs have to be translated as well + mergedTranslation = "" + for timeDetail in timeInfo: + tmp = findLattice(timeDetail) + if tmp != -1: + # Concatenate lattices + mergedTranslation = latticeConcatenate(mergedTranslation, tmp) + + print(mergedTranslation) + if mergedTranslation != "": + + # Sanjeev's Recipe : Remove epsilons and topo sort + finalFST = tmpdir + "/final.fst" + os.system("fstrmepsilon " + mergedTranslation + " | fsttopsort - " + finalFST) + + # Now convert to PLF + proc = subprocess.Popen('/export/a04/gkumar/corpora/fishcall/bin/fsm2plf.sh ' + symtable + ' ' + finalFST, stdout=subprocess.PIPE, shell=True) + PLFline = proc.stdout.readline() + finalPLFFile = tmpdir + "/final.plf" + finalPLF = open(finalPLFFile, "w+") + finalPLF.write(PLFline) + finalPLF.close() + + # now check if this is a valid PLF, if not write it's ID in a + # file so it can be checked later + proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True) + line = proc.stdout.readline() + print("{} {}".format(line, lineNo)) + if line.strip() != "PLF format appears to be correct.": + os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0]) + invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n") + rmLines.write("{}\n".format(lineNo)) + else: + provFile.write(PLFline) + else: + blankPLF.write(timeInfo[0] + "\n") + rmLines.write("{}\n".format(lineNo)) + # Now convert to PLF + lineNo += 1 + +provFile.close() +invalidPLF.close() +blankPLF.close() +rmLines.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh new file mode 100755 index 00000000000..d06e5fe911f --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# Gets the unique speakers from the file created by fsp_make_trans.pl +# Note that if a speaker appears multiple times, it is categorized as female + +import os +import sys + +tmpFileLocation = 'data/local/tmp/callhome_spk2gendertmp' + +tmpFile = None + +try: + tmpFile = open(tmpFileLocation) +except IOError: + print 'The file spk2gendertmp does not exist. Run fsp_make_trans.pl first?' + +speakers = {} + +for line in tmpFile: + comp = line.split(' ') + if comp[0] in speakers: + speakers[comp[0]] = "f" + else: + speakers[comp[0]] = comp[1] + +for speaker, gender in speakers.iteritems(): + print speaker + " " + gender diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl new file mode 100755 index 00000000000..ec3dfd88037 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl @@ -0,0 +1,74 @@ +#!/usr/bin/env perl +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +use utf8; +use File::Basename; + +($tmpdir)=@ARGV; +$trans="$tmpdir/callhome_train_transcripts.flist"; +$reco="$tmpdir/callhome_reco2file_and_channel"; +open(T, "<", "$trans") || die "Can't open transcripts file"; +open(R, "|sort >$reco") || die "Can't open reco2file_and_channel file $!"; +open(O, ">$tmpdir/callhome.text.1") || die "Can't open text file for writing"; +open(G, ">$tmpdir/callhome_spk2gendertmp") || die "Can't open the speaker to gender map file"; +binmode(O, ":utf8"); +while () { + $file = $_; + m:([^/]+)\.txt: || die "Bad filename $_"; + $call_id = $1; + print R "$call_id-A $call_id A\n"; + print R "$call_id-B $call_id B\n"; + open(I, "<$file") || die "Opening file $_"; + binmode(I, ":iso88591"); + #Now read each line and extract information + while () { + #136.37 138.10 B: Ah, bueno, mamita. + chomp; + + my @stringComponents = split(":", $_, 2); + my @timeInfo = split(" ", $stringComponents[0]); + $stringComponents[1] =~ s/^\s+|\s+$//g ; + my $words = $stringComponents[1]; + #Check number of components in this array + if ((scalar @stringComponents) >= 2) { + $start = sprintf("%06d", $timeInfo[0] * 100); + $end = sprintf("%06d", $timeInfo[1] * 100); + length($end) > 6 && die "Time too long $end in $file"; + $side = "A"; + if (index($timeInfo[2], "B") != -1) { + $side = "B"; + } + $utt_id = "${call_id}-$side-$start-$end"; + $speaker_id = "${call_id}-$side"; + # All speakers are treated as male because speaker gender info + # is missing in this file + $gender = "m"; + print G "$speaker_id $gender\n" || die "Error writing to speaker2gender file"; + $words =~ s|\[\[[^]]*\]\]||g; #removes comments + $words =~ s|\{laugh\}|\$laughter\$|g; # replaces laughter tmp + $words =~ s|\[laugh\]|\$laughter\$|g; # replaces laughter tmp + $words =~ s|\{[^}]*\}|\[noise\]|g; # replaces noise + $words =~ s|\[[^]]*\]|\[noise\]|g; # replaces noise + $words =~ s|\[/*([^]]*)\]|\[noise\]|g; # replaces end of noise + $words =~ s|\$laughter\$|\[laughter\]|g; # replaces laughter again + $words =~ s|\(\(([^)]*)\)\)|\1|g; # replaces unintelligible speech + $words =~ s|<\?([^>]*)>|\1|g; # for unrecognized language + $words =~ s|background speech|\[noise\]|g; + $words =~ s|background noise|\[noise\]|g; + $words =~ s/\[/larrow/g; + $words =~ s/\]/rarrow/g; + $words =~ s/[[:punct:]]//g; + $words =~ s/larrow/\[/g; + $words =~ s/rarrow/\]/g; + $words =~ s/[¿¡]//g; + $words =~ s/\h+/ /g; # horizontal whitespace characters + $words = lc($words); + print O "$utt_id $words\n" || die "Error writing to text file"; + } + } + close(I); +} +close(T); +close(R); +close(O); +close(G); diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh new file mode 100755 index 00000000000..37e1eca1687 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +if [ $# -gt 0 ]; then + sentence=$1 + echo $sentence | sed 's:{^[}]*}:[noise]:' +fi + + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh new file mode 100755 index 00000000000..c487f1bd222 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh @@ -0,0 +1,288 @@ +#!/bin/bash + +# 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e. +# with bypass resnet connections, and re-tuned. +# compute-wer --text --mode=present ark:exp/chain/multipsplice_tdnn/decode_fsp_train_test/scoring_kaldi/test_filt.txt ark,p:- +# %WER 22.21 [ 8847 / 39831, 1965 ins, 2127 del, 4755 sub ] +# %SER 56.98 [ 3577 / 6278 ] +# Scored 6278 sentences, 0 not present in hyp. + +# steps/info/chain_dir_info.pl exp/chain/multipsplice_tdnn +# exp/chain/multipsplice_tdnn: num-iters=296 nj=1..2 num-params=8.2M dim=40+100->2489 combine=-0.170->-0.165 (over 8) xent:train/valid[196,295,final]=(-2.30,-1.93,-1.83/-2.24,-1.96,-1.86) logprob:train/valid[196,295,final]=(-0.208,-0.169,-0.164/-0.189,-0.161,-0.158) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train +test_sets="test dev" +gmm=tri5a # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1g #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM/chain options +train_stage=-10 +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.3@0.50,0' + +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 17 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 18 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 19 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.005" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1024 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + linear-component name=prefinal-l dim=192 $linear_opts + + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 20 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.0 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.srand $srand \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.frames-per-iter 5000000 \ + --trainer.optimization.num-jobs-initial 1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate 0.0005 \ + --trainer.optimization.final-effective-lrate 0.00005 \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.optimization.momentum 0.0 \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context 0 \ + --egs.chunk-right-context 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --cleanup.remove-egs $remove_egs \ + --use-gpu true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir exp/tri5a_lats_nodup_sp \ + --dir $dir || exit 1; +fi + +if [ $stage -le 21 ]; then + # The reason we are using data/lang_test here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + #LM was trained only on Fisher Spanish train subset. + + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test \ + $tree_dir $tree_dir/graph_fsp_train || exit 1; + +fi + +rnnlmdir=exp/rnnlm_lstm_tdnn_1b +if [ $stage -le 22 ]; then + local/rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; +fi + +if [ $stage -le 23 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l " + exit 1; +fi + +txtdir=$1 +textdir=$(realpath $txtdir) +outdir=$(realpath $2) + +workdir=$outdir/tmp +if [ $stage -le 0 ]; then + rm -rf $outdir + mkdir -p $workdir + mkdir -p $textdir/splits + mkdir -p $outdir/data + split -l 1000000 $textdir/in.txt $textdir/splits/out + numsplits=0 + for x in $textdir/splits/*; do + numsplits=$((numsplits+1)) + ln -s $x $outdir/data/$numsplits + done + echo $numsplits + cp $SPARROWHAWK_ROOT/documentation/grammars/sentence_boundary_exceptions.txt . + $train_cmd --max_jobs_run 100 JOB=1:$numsplits $outdir/sparrowhawk/log/JOB.log \ + local/run_norm.sh \ + sparrowhawk_configuration.ascii_proto \ + $SPARROWHAWK_ROOT/language-resources/en/sparrowhawk/ \ + $outdir/data \ + JOB \ + $outdir/sparrowhawk/ + cat $outdir/sparrowhawk/*.txt | sed "/^$/d" > $outdir/text_normalized + + # check if numbers are there in normalized output + awk '{for(i=1;i<=NF;i++) {if (!seen[$i]) {print $i; seen[$i]=1} }}' \ + $outdir/text_normalized > $outdir/unique_words + grep "[0-9]" $outdir/unique_words | sort -u > $outdir/numbers +fi diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh new file mode 100755 index 00000000000..d48a96db5c4 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +# No sanity checks here, they need to be added + +data=data/callhome_test +dir=exp/tri5a/decode_callhome_test +lang=data/lang +LMWT=13 + +[ -f ./path.sh ] && . ./path.sh + +cmd=run.pl +filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel" +name=`basename $data`; +model=$dir/../final.mdl # assume model one level up from decoding dir. +symTable=$lang/words.txt + +if [ ! -f $dir/oracle/oracle.lat.gz ]; then + cat $data/text | utils/sym2int.pl --map-oov [oov] -f 2- $symTable | \ + lattice-oracle --write-lattices="ark:|gzip -c > $dir/oracle/oracle.lat.gz" \ + "ark:gunzip -c $dir/lat.*.gz|" ark:- ark:- > /dev/null 2>&1 +fi + +lattice-align-words $lang/phones/word_boundary.int $model \ + "ark:gunzip -c $dir/oracle/oracle.lat.gz|" ark:- | \ + lattice-1best --lm-scale=$LMWT ark:- ark:- | nbest-to-ctm ark:- - | \ + utils/int2sym.pl -f 5 $lang/words.txt | \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + > $dir/oracle/$name.ctm diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh new file mode 100755 index 00000000000..8a60dc9d422 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +data_dir=data +train_all=data/train_all + +if [ $# -lt 1 ]; then + echo "Specify the location of the split files" + exit 1; +fi + +splitFile=$1 + +# Train first +for split in train dev test dev2 +do + + cp -r $train_all $data_dir/$split + + awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ + $splitFile/$split $train_all/segments > $data_dir/$split/segments + + n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$split/segments | sort | uniq | wc -l` + + echo "$n conversations left in split $split" + + utils/fix_data_dir.sh $data_dir/$split + utils/validate_data_dir.sh $data_dir/$split +done + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh new file mode 100755 index 00000000000..7d09f574580 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +. ./cmd.sh + +split=test +data_dir=data/test +decode_dir=exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it4/ +lang_dir=data/lang + +# Create the STM file +# Always create this file before creating the CTM files so that +# channel numbers are properly created. +if [ ! -f $data_dir/stm ]; then + /export/a11/guoguo/babel/103-bengali-limitedLP.official/local/prepare_stm.pl $data_dir +fi + +# Create the CTM file +steps/get_ctm.sh $data_dir $lang_dir $decode_dir + +# Make sure that channel markers match +#sed -i "s:\s.*_fsp-([AB]): \1:g" data/dev/stm +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s1\s:fsp A :g' {} +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s2\s:fsp B :g' {} + +# Get the environment variables +. /export/babel/data/software/env.sh + +# Start scoring +/export/a11/guoguo/babel/103-bengali-limitedLP.official/local/score_stm.sh $data_dir $lang_dir \ + $decode_dir + +# Print a summary of the result +grep "Percent Total Error" $decode_dir/score_*/$split.ctm.dtl diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py b/egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py new file mode 100755 index 00000000000..6f3d3f80c95 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python + +# Author : Gaurav Kumar (Johns Hopkins University) +# Gets a report on what the best word error rate was and which iteration +# led to it. This is needed both for reporting purposes and for setting +# the acoustic scale weight which extracting lattices. +# This script is specific to my partitions and needs to be made more general +# or modified + +from __future__ import print_function +import subprocess +import os + +decode_directories = ['exp/tri5a/decode_dev', + 'exp/tri5a/decode_test', + 'exp/tri5a/decode_dev2', + 'exp/sgmm2x_6a/decode_dev_fmllr', + 'exp/sgmm2x_6a/decode_test_fmllr', + 'exp/sgmm2x_6a/decode_dev2_fmllr', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it1', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it2', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it3', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it4', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it1', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it2', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it3', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it4', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it1', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it2', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it3', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it4', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it1', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it2', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it3', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it4', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it1', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it2', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it3', + 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it4', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it1', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it2', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it3', + 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it4' + ] + +def get_best_wer(decode_dir): + best_iteration = 0 + best_wer = 100.0 + for i in range(16): + if os.path.isfile("{}/wer_{}".format(decode_dir, i)): + result = subprocess.check_output("tail -n 3 {}/wer_{}".format(decode_dir, i), shell=True) + wer_string = result.split("\n")[0] + wer_details = wer_string.split(' ') + # Get max WER + wer = float(wer_details[1]) + if wer < best_wer: + best_wer = wer + best_iteration = i + return best_iteration, best_wer + +for decode_dir in decode_directories[:6]: + print(decode_dir) + print(get_best_wer(decode_dir)) + +# Separate processing for bMMI stuff +best_wer = 100.0 +best_dir = "" +best_iteration = 0 + +for decode_dir in decode_directories[6:10]: + iteration, wer = get_best_wer(decode_dir) + if wer < best_wer: + best_wer = wer + best_dir = decode_dir + best_iteration = iteration + +print(best_dir) +print((best_iteration, best_wer)) + +best_wer = 100.0 +best_dir = "" +best_iteration = 0 + +for decode_dir in decode_directories[10:14]: + iteration, wer = get_best_wer(decode_dir) + if wer < best_wer: + best_wer = wer + best_dir = decode_dir + best_iteration = iteration + +print(best_dir) +print((best_iteration, best_wer)) + +best_wer = 100.0 +best_dir = "" +best_iteration = 0 + +for decode_dir in decode_directories[14:18]: + iteration, wer = get_best_wer(decode_dir) + if wer < best_wer: + best_wer = wer + best_dir = decode_dir + best_iteration = iteration + +print(best_dir) +print((best_iteration, best_wer)) + +best_wer = 100.0 +best_dir = "" +best_iteration = 0 + +for decode_dir in decode_directories[18:22]: + iteration, wer = get_best_wer(decode_dir) + if wer <= best_wer: + best_wer = wer + best_dir = decode_dir + best_iteration = iteration + +print(best_dir) +print((best_iteration, best_wer)) + +best_wer = 100.0 +best_dir = "" +best_iteration = 0 + +for decode_dir in decode_directories[22:26]: + iteration, wer = get_best_wer(decode_dir) + if wer <= best_wer: + best_wer = wer + best_dir = decode_dir + best_iteration = iteration + +print(best_dir) +print((best_iteration, best_wer)) + +best_wer = 100.0 +best_dir = "" +best_iteration = 0 + +for decode_dir in decode_directories[26:]: + iteration, wer = get_best_wer(decode_dir) + if wer <= best_wer: + best_wer = wer + best_dir = decode_dir + best_iteration = iteration + +print(best_dir) +print((best_iteration, best_wer)) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl new file mode 100755 index 00000000000..2da41182d20 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl @@ -0,0 +1,25 @@ +#!/usr/bin/env perl +#Finds unique phones from the basic rules file +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +use utf8; + +($b)=$ARGV[0]; +($tmpdir)=$ARGV[1]; +open(BB, "<", "$b/basic_rules") || die "Can't open basic rules"; +binmode(BB, ":iso88591"); +open(O, ">$tmpdir/phones") || die "Can't open text file for writing"; +binmode(O, ":utf8"); +my %phones = qw(); +while () { + chomp; + my @stringComponents = split(/\t/); + m/->\s(\S+)/; + my $phone = $1; + $phone =~ tr/áéíóú/aeiou/; + $phones{$phone} = 1; +} +foreach my $p (keys %phones) { + print O $p, "\n"; +} +#print keys %phones; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh new file mode 100755 index 00000000000..20220d107bc --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# Fixes the CALLHOME stm files +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +data_dir=$1 + +cat $data_dir/stm | awk '{$1=substr(tolower($1),0,length($1)-4);print;}' > $data_dir/stm_new +mv $data_dir/stm $data_dir/stm.bak +mv $data_dir/stm_new $data_dir/stm diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh new file mode 100755 index 00000000000..242359e7c28 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +# Path to Gigaword corpus with all data files decompressed. +export GIGAWORDDIR=$1 +# The directory to write output to +export OUTPUTDIR=$2 +# The number of jobs to run at once +export NUMJOBS=$3 + +echo "Flattening Gigaword with ${NUMJOBS} processes..." +mkdir -p $OUTPUTDIR +find ${GIGAWORDDIR}/data/*/* -type f -print -exec local/flatten_gigaword/run_flat.sh {} ${OUTPUTDIR} \; +echo "Combining the flattened files into one..." +cat ${OUTPUTDIR}/*.flat > ${OUTPUTDIR}/flattened_gigaword.txt diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py new file mode 100644 index 00000000000..29f6766dd84 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +import logging +import os +import re +import spacy +import gzip + +from argparse import ArgumentParser +from bs4 import BeautifulSoup + +en_nlp = spacy.load("es") + + +def flatten_one_gigaword_file(file_path): + f = gzip.open(file_path) + html = f.read() + # Parse the text with BeautifulSoup + soup = BeautifulSoup(html, "html.parser") + + # Iterate over all

items and get the text for each. + all_paragraphs = [] + for paragraph in soup("p"): + # Turn inter-paragraph newlines into spaces + paragraph = paragraph.get_text() + paragraph = re.sub(r"\n+", "\n", paragraph) + paragraph = paragraph.replace("\n", " ") + # Tokenize the paragraph into words + tokens = en_nlp.tokenizer(paragraph) + words = [str(token) for token in tokens if not + str(token).isspace()] + if len(words) < 3: + continue + all_paragraphs.append(words) + # Return a list of strings, where each string is a + # space-tokenized paragraph. + return [" ".join(paragraph) for paragraph in all_paragraphs] + + +if __name__ == "__main__": + log_fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + logging.basicConfig(level=logging.INFO, format=log_fmt) + logger = logging.getLogger(__name__) + + parser = ArgumentParser(description=("Flatten a gigaword data file for " + "use in language modeling.")) + parser.add_argument("--gigaword-path", required=True, + metavar="", type=str, + help=("Path to Gigaword directory, with " + "all .gz files unzipped.")) + parser.add_argument("--output-dir", required=True, metavar="", + type=str, help=("Directory to write final flattened " + "Gigaword file.")) + + A = parser.parse_args() + all_paragraphs = flatten_one_gigaword_file(A.gigaword_path) + output_path = os.path.join(A.output_dir, + os.path.basename(A.gigaword_path) + ".flat") + with open(output_path, "w") as output_file: + for paragraph in all_paragraphs: + output_file.write("{}\n".format(paragraph)) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh new file mode 100755 index 00000000000..6b236be0ab9 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -e + +. ./path_venv.sh + +# Path to Gigaword corpus with all data files decompressed. +GIGAWORDPATH=$1 +# The directory to write output to +OUTPUTDIR=$2 +file=$(basename ${GIGAWORDPATH}) +if [ ! -e ${OUTPUTDIR}/${file}.flat ]; then + echo "flattening to ${OUTPUTDIR}/${file}.flat" + python local/flatten_gigaword/flatten_one_gigaword.py --gigaword-path ${GIGAWORDPATH} --output-dir ${OUTPUTDIR} +else + echo "skipping ${file}.flat" +fi + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh new file mode 100755 index 00000000000..fb765b57e69 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# + +if [ -f path.sh ]; then . ./path.sh; fi + +mkdir -p data/lang_test + +arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz +[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; + +mkdir -p data/lang_test +cp -r data/lang/* data/lang_test + +gunzip -c "$arpa_lm" | \ + arpa2fst --disambig-symbol=#0 \ + --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst + + +echo "Checking how stochastic G is (the first of these numbers should be small):" +fstisstochastic data/lang_test/G.fst + +## Check lexicon. +## just have a look and make sure it seems sane. +echo "First few lines of lexicon FST:" +fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head + +echo Performing further checks + +# Checking that G.fst is determinizable. +fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. + +# Checking that L_disambig.fst is determinizable. +fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. + +# Checking that disambiguated lexicon times G is determinizable +# Note: we do this with fstdeterminizestar not fstdeterminize, as +# fstdeterminize was taking forever (presumbaly relates to a bug +# in this version of OpenFst that makes determinization slow for +# some case). +fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ + fstdeterminizestar >/dev/null || echo Error + +# Checking that LG is stochastic: +fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ + fstisstochastic || echo "[log:] LG is not stochastic" + + +echo "$0 succeeded" diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh new file mode 100755 index 00000000000..11d65da3e95 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# The input is the Fisher Dataset which contains DISC1 and DISC2. (*.sph files) +# In addition the transcripts are needed as well. +# To be run from one directory above this script. + +# Note: when creating your own data preparation scripts, it's a good idea +# to make sure that the speaker id (if present) is a prefix of the utterance +# id, that the output scp file is sorted on utterance id, and that the +# transcription file is exactly the same length as the scp file and is also +# sorted on utterance id (missing transcriptions should be removed from the +# scp file using e.g. scripts/filter_scp.pl) + +stage=0 + +export LC_ALL=C + + +if [ $# -lt 2 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /home/mpost/data/LDC/LDC2010S01 /home/mpost/data/LDC/LDC2010T04" + exit 1; +fi + +cdir=`pwd` +dir=`pwd`/data/local/data +lmdir=`pwd`/data/local/nist_lm +mkdir -p $dir $lmdir +local=`pwd`/local +utils=`pwd`/utils +tmpdir=`pwd`/data/local/tmp +mkdir -p $tmpdir + +. ./path.sh || exit 1; # Needed for KALDI_ROOT +export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin +sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +if [ ! -x $sph2pipe ]; then + echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; + exit 1; +fi +cd $dir + +# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command +# line arguments being absolute pathnames. +rm -r links/ 2>/dev/null +mkdir links/ +ln -s $* links + +# Basic spot checks to see if we got the data that we needed +if [ ! -d links/LDC2010S01 -o ! -d links/LDC2010T04 ]; +then + echo "The speech and the data directories need to be named LDC2010S01 and LDC2010T04 respecti +vely" + exit 1; +fi + +#if [ ! -d links/LDC2010S01/DISC1/data/speech -o ! -d links/LDC2010S01/DISC2/data/speech ]; +if [ ! -d links/LDC2010S01/data/speech ]; +then + echo "Speech directories missing or not properly organised within the speech data dir" + echo "Typical format is LDC2010S01/data/speech" + exit 1; +fi + +#Check the transcripts directories as well to see if they exist +if [ ! -d links/LDC2010T04/fisher_spa_tr/data/transcripts ]; +then + echo "Transcript directories missing or not properly organised" + echo "Typical format is LDC2010T04/fisher_spa_tr/data/transcripts" + exit 1; +fi + +#speech_d1=$dir/links/LDC2010S01/DISC1/data/speech +#speech_d2=$dir/links/LDC2010S01/DISC2/data/speech +speech=$dir/links/LDC2010S01/data/speech +transcripts=$dir/links/LDC2010T04/fisher_spa_tr/data/transcripts + +#fcount_d1=`find ${speech_d1} -iname '*.sph' | wc -l` +#fcount_d2=`find ${speech_d2} -iname '*.sph' | wc -l` +fcount_s=`find ${speech} -iname '*.sph' | wc -l` +fcount_t=`find ${transcripts} -iname '*.tdf' | wc -l` +#TODO:it seems like not all speech files have transcripts +#Now check if we got all the files that we needed +#if [ $fcount_d1 != 411 -o $fcount_d2 != 408 -o $fcount_t != 819 ]; +if [ $fcount_s != 819 -o $fcount_t != 819 ]; +then + echo "Incorrect number of files in the data directories" + echo "DISC1 and DISC2 should contain 411 and 408 .sph files respectively (Total = 819)" + echo "The transcripts should contain 819 files" + exit 1; +fi + +if [ $stage -le 0 ]; then + #Gather all the speech files together to create a file list + #TODO: Train and test split might be required + ( + #find $speech_d1 -iname '*.sph'; + #find $speech_d2 -iname '*.sph'; + find $speech -iname '*.sph'; + ) > $tmpdir/train_sph.flist + + #Get all the transcripts in one place + find $transcripts -iname '*.tdf' > $tmpdir/train_transcripts.flist +fi + +if [ $stage -le 1 ]; then + $local/fsp_make_trans.pl $tmpdir + mkdir -p $dir/train_all + mv $tmpdir/reco2file_and_channel $dir/train_all/ +fi + +if [ $stage -le 2 ]; then + sort $tmpdir/text.1 | grep -v '((' | \ + awk '{if (NF > 1){ print; }}' | \ + sed 's:<\s*[/]*\s*\s*for[ei][ei]g[nh]\s*\w*>::g' | \ + sed 's:\([^<]*\)<\/lname>:\1:g' | \ + sed 's:::g' | \ + sed 's:[^<]*<\/laugh>:[laughter]:g' | \ + sed 's:<\s*cough[\/]*>:[noise]:g' | \ + sed 's::[noise]:g' | \ + sed 's::[noise]:g' | \ + sed 's::[noise]:g' | \ + sed 's:[^<]*<\/background>:[noise]:g' | \ + sed -r 's:<[/]?background[/]?>:[noise]:g' | \ + #One more time to take care of nested stuff + sed 's:[^<]*<\/laugh>:[laughter]:g' | \ + sed -r 's:<[/]?laugh[/]?>:[laughter]:g' | \ + #now handle the exceptions, find a cleaner way to do this? + sed 's:::g' | \ + sed 's:::g' | \ + sed 's:foreign>::g' | \ + sed 's:>::g' | \ + #How do you handle numbers? + grep -v '()' | \ + #Now go after the non-printable characters and multiple spaces + sed -r 's:¿::g' | sed 's/^\s\s*|\s\s*$//g' | sed 's/\s\s*/ /g' > $tmpdir/text.2 + cp $tmpdir/text.2 $dir/train_all/text + + #Create segments file and utt2spk file + ! cat $dir/train_all/text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > $dir/train_all/utt2spk \ + && echo "Error producing utt2spk file" && exit 1; + + cat $dir/train_all/text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; + $s = sprintf("%.2f", 0.01*$3); $e = sprintf("%.2f", 0.01*$4); if ($s != $e) {print "$utt $reco $s $e\n"}; ' >$dir/train_all/segments + + $utils/utt2spk_to_spk2utt.pl <$dir/train_all/utt2spk > $dir/train_all/spk2utt +fi + +if [ $stage -le 3 ]; then + for f in `cat $tmpdir/train_sph.flist`; do + # convert to absolute path + make_absolute.sh $f + done > $tmpdir/train_sph_abs.flist + + cat $tmpdir/train_sph_abs.flist | perl -ane 'm:/([^/]+)\.sph$: || die "bad line $_; "; print "$1 $_"; ' > $tmpdir/sph.scp + cat $tmpdir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ + sort -k1,1 -u > $dir/train_all/wav.scp || exit 1; +fi + +if [ $stage -le 4 ]; then + # Build the speaker to gender map, the temporary file with the speaker in gender information is already created by fsp_make_trans.pl. + cd $cdir + $local/fsp_make_spk2gender.sh > $dir/train_all/spk2gender +fi + +fix_data_dir.sh $dir/train_all || exit 1 +validate_data_dir.sh --no-feats $dir/train_all || exit 1 + +echo "Fisher Spanish Data preparation succeeded." + +exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl new file mode 100755 index 00000000000..538bca58981 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl @@ -0,0 +1,85 @@ +#!/usr/bin/env perl +# +# Johns Hopkins University (Author : Gaurav Kumar) +# +# This script should be run from one directory above the current one +# +# Rough partitions that are needed are : +# +# ASR Train : 120k utterances +# ASR tune : 20k utterances +# ASR eval : 20k utterances +# MT train : 105k utterances +# MT tune : Same as the ASR eval (20k utterances) +# MT eval : 20k utterances +# +# This script tries to find the closest possible matches so that conversations +# belong in one single partition and hence there is no speaker/conversation +# overlap between data partitions + +use Storable 'dclone'; + +$textfile="data/local/data/train_all/text"; +$tmp="data/local/tmp"; + +open(T, "<", "$textfile") || die "Can't open text file"; + +$ongoingConv = ""; +%tmpSplits = (); +@splitNumbers = (17455, 20000, 100000, 20000, 100000); +$splitId = 0; +%splits = (); + +while () { + @myStringComponents = split(/\s/); + @uttid = split('-', $myStringComponents[0]); + $currentConv = $uttid[0]; + if ($currentConv eq $ongoingConv) { + # Same conversation, add to current hash + #print "Same conversation"; + $tmpSplits{$ongoingConv} += 1; + } + else { + # New conversation intiated, first check if there are enough entries + # in the hash + #print $ongoingConv . " " . get_entries_hash(\%tmpSplits) . "\n"; + if (get_entries_hash(\%tmpSplits) > $splitNumbers[$splitId]) { + print "Finished processing split " . $splitId . ". It contains " . get_entries_hash(\%tmpSplits) . " entries. \n"; + #$splits{$splitId} = keys %tmpSplits; + @newArr = keys %tmpSplits; + $splits{$splitId} = dclone(\@newArr); + %tmpSplits = (); + $splitId += 1; + } + $ongoingConv = $currentConv; + $tmpSplits{$ongoingConv} = 1; + } +} +# Put final tmpsplits in the right partition +@newArr = keys %tmpSplits; +$splits{$splitId} = dclone(\@newArr); +foreach (keys %splits) { + #print $_ , " ", $splits{$_}, "\n"; +} +print "Finished processing split " . $splitId . ". It contains " . get_entries_hash(\%tmpSplits) . " entries. \n"; + +# Write splits to file +foreach my $key ( keys %splits ) { + open(S, ">$tmp/split-$key") || die "Can't open splitfile to write"; + foreach my $file ( @{$splits{$key}} ) { + print $file, "\n"; + print S "$file\n" || die "Error writing to file"; + } + close(S); +} + +sub get_entries_hash() { + my $inputHashRef = shift; + $total = 0; + foreach (keys %{$inputHashRef}) + { + $total += $inputHashRef->{$_}; + } + return $total; +} + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh new file mode 100755 index 00000000000..15b1c0064cf --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# Gets the unique speakers from the file created by fsp_make_trans.pl +# Note that if a speaker appears multiple times, it is categorized as female + +import os +import sys + +tmpFileLocation = 'data/local/tmp/spk2gendertmp' + +tmpFile = None + +try: + tmpFile = open(tmpFileLocation) +except IOError: + print 'The file spk2gendertmp does not exist. Run fsp_make_trans.pl first?' + +speakers = {} + +for line in tmpFile: + comp = line.split(' ') + if comp[0] in speakers: + speakers[comp[0]] = "f" + else: + speakers[comp[0]] = comp[1] + +for speaker, gender in speakers.iteritems(): + print speaker + " " + gender diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl new file mode 100755 index 00000000000..8c3f74e3917 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl @@ -0,0 +1,81 @@ +#!/usr/bin/env perl +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +use utf8; +use File::Basename; +($tmpdir)=@ARGV; +#$tmpdir='../data/local/tmp'; +$trans="$tmpdir/train_transcripts.flist"; +$reco="$tmpdir/reco2file_and_channel"; +open(T, "<", "$trans") || die "Can't open transcripts file"; +open(R, "|sort >$reco") || die "Can't open reco2file_and_channel file $!"; +open(O, ">$tmpdir/text.1") || die "Can't open text file for writing"; +open(G, ">$tmpdir/spk2gendertmp") || die "Can't open the speaker to gender map file"; +binmode(O, ":utf8"); +while () { + $file = $_; + m:([^/]+)\.tdf: || die "Bad filename $_"; + $call_id = $1; + print R "$call_id-A $call_id A\n"; + print R "$call_id-B $call_id B\n"; + open(I, "<$file") || die "Opening file $_"; + binmode(I, ":utf8"); + # Get rid of header sections first + foreach ( 0..2 ) { + $tmpLine = ; + } + #Now read each line and extract information + while () { + #20051017_215732_274_fsp.sph 1 0.0 0.909856781803 Audrey female native Audrey 0 0 -1 + chomp; + my @stringComponents = split(/\t/); + + #Check number of components in this array + if ((scalar @stringComponents) >= 11) { + $start = sprintf("%06d", $stringComponents[2] * 100); + $end = sprintf("%06d", $stringComponents[3] * 100); + length($end) > 6 && die "Time too long $end in $file"; + $side = $stringComponents[1] ? "B" : "A"; + $words = $stringComponents[7]; + $utt_id = "${call_id}-$side-$start-$end"; + $speaker_id = "${call_id}-$side"; + $gender = "m"; + if ($stringComponents[5] == "female") { + $gender = "f"; + } + print G "$speaker_id $gender\n" || die "Error writing to speaker2gender file"; + $words =~ s:/rarrow/g; + $words =~ s/[[:punct:]]//g; + $words =~ s/larrow//g; + $words =~ s:lendarrow: 0){ print; }}' > $tmpdir/uniquewords + if [ ! -f "${tmpdir}/es_wordlist.json" ]; then + echo "Could not find the large collection of Spanish words es_wordlist.json" + echo "Trying to download it via wget" + + if ! which wget >&/dev/null; then + echo "This script requires you to first install wget" + exit 1; + fi + + cwd=`pwd` + cd $tmpdir + wget -T 10 -t 3 -c http://www.openslr.org/resources/21/es_wordlist.json.tgz + + if [ ! -e ${tmpdir}/es_wordlist.json.tgz ]; then + echo "Download of the large Spanish word list failed" + exit 1; + fi + + tar -xovzf es_wordlist.json.tgz || exit 1; + cd $cwd + fi + + # Merge with gigaword corpus + $local/merge_lexicons.py ${tmpdir} ${lexicon} + mv $tmpdir/uniquewords $tmpdir/uniquewords.small + mv $tmpdir/uniquewords64k $tmpdir/uniquewords +fi + +#Then get the list of phones form basic_rules in the lexicon folder +if [ $stage -le 1 ]; then + if [ ! -d "$lexicon/callhome_spanish_lexicon_970908" ]; then + echo "Could not find folder callhome_spanish_lexicon_970908 in the lexicon folder" + exit 1; + fi + + # This is a preliminary attempt to get the unique phones from the LDC lexicon + # This will be extended based on our lexicon later + perl $local/find_unique_phones.pl $lexicon/callhome_spanish_lexicon_970908 $tmpdir + +fi + +#Get pronunciation for each word using the spron.pl file in the lexicon folder +if [ $stage -le 2 ]; then + #cd $lexicon/callhome_spanish_lexicon_970908 + # Replace all words for which no pronunciation was generated with an orthographic + # representation + cat $tmpdir/uniquewords | $local/spron.pl $lexicon/callhome_spanish_lexicon_970908/preferences $lexicon/callhome_spanish_lexicon_970908/basic_rules \ + | cut -f1 | sed -r 's:#\S+\s\S+\s\S+\s\S+\s(\S+):\1:g' \ + | awk -F '[/][/]' '{print $1}' \ + > $tmpdir/lexicon_raw +fi + +#Break the pronunciation down according to the format required by Kaldi +if [ $stage -le 3 ]; then + # Creates a KALDI compatible lexicon, and extends the phone list + perl $local/isolate_phones.pl $tmpdir + cat $tmpdir/phones_extended | sort | awk '{if ($1 != "") {print;}}' > $tmpdir/phones_extended.1 + mv $tmpdir/phones $tmpdir/phones.small + mv $tmpdir/phones_extended.1 $tmpdir/phones + sort $tmpdir/phones -o $tmpdir/phones + paste -d ' ' $tmpdir/uniquewords $tmpdir/lexicon_one_column | sed -r 's:(\S+)\s#.*:\1 oov:g' > $tmpdir/lexicon.1 + #paste -d ' ' $tmpdir/uniquewords $tmpdir/lexicon_one_column | grep -v '#' > $tmpdir/lexicon.1 +fi + +if [ $stage -le 4 ]; then + # silence phones, one per line. + for w in sil laughter noise oov; do echo $w; done > $dir/silence_phones.txt + echo sil > $dir/optional_silence.txt + + # An extra question will be added by including the silence phones in one class. + cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > \ + $dir/extra_questions.txt || exit 1; + + # Remove [] chars from phones + cat $tmpdir/phones | awk '{if ($1 != "_" && $1 != "[" && $1 != "]") {print;}}' > $tmpdir/phones.1 + rm $tmpdir/phones + mv $tmpdir/phones.1 $tmpdir/phones + cp $tmpdir/phones $dir/nonsilence_phones.txt + + if [ -f $tmpdir/lexicon.2 ]; then rm $tmpdir/lexicon.2; fi + cp "$tmpdir/lexicon.1" "$tmpdir/lexicon.2" + + # Add prons for laughter, noise, oov + for w in `grep -v sil $dir/silence_phones.txt`; do + sed -i "/\[$w\]/d" $tmpdir/lexicon.2 + done + + for w in `grep -v sil $dir/silence_phones.txt`; do + echo "[$w] $w" + done | cat - $tmpdir/lexicon.2 > $tmpdir/lexicon.3 || exit 1; + + cat $tmpdir/lexicon.3 \ + <( echo "mm m" + echo " oov" ) > $tmpdir/lexicon.4 + + # From the lexicon remove _ from the phonetic representation + cat $tmpdir/lexicon.4 | sed 's:\s_::g' > $tmpdir/lexicon.5 + + cp "$tmpdir/lexicon.5" $dir/lexicon.txt + + cat $datadir/text | \ + awk '{for (n=2;n<=NF;n++){ count[$n]++; } } END { for(n in count) { print count[n], n; }}' | \ + sort -nr > $tmpdir/word_counts + + awk '{print $1}' $dir/lexicon.txt | \ + perl -e '($word_counts)=@ARGV; + open(W, "<$word_counts")||die "opening word-counts $word_counts"; + while() { chop; $seen{$_}=1; } + while() { + ($c,$w) = split; + if (!defined $seen{$w}) { print; } + } ' $tmpdir/word_counts > $tmpdir/oov_counts.txt + echo "*Highest-count OOVs are:" + head -n 20 $tmpdir/oov_counts.txt +fi + +$utils/validate_dict_dir.pl $dir +exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh new file mode 100755 index 00000000000..cebf3b222ab --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +# To be run from one level above this directory +# Generate the text for the LM training +tmp_dir=data/local/tmp +train_all=data/local/data/train_all + +if [ $# -lt 1 ]; then + echo "Specify the location of the split files" + exit 1; +fi + +splitFile=$1 +split=train +# Train only +if [ -d $tmp_dir/$split ]; then + rm -r $tmp_dir/$split +fi +cp -r $train_all $tmp_dir/$split + +awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ +$splitFile/$split $train_all/segments > $tmp_dir/$split/segments + +n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $tmp_dir/$split/segments | sort | uniq | wc -l` + +echo "$n conversations left in split $split" + +utils/fix_data_dir.sh $tmp_dir/$split +# There is no feature file yet, use --no-feats switch +utils/validate_data_dir.sh --no-feats $tmp_dir/$split + +# Now use this training text + +text=$tmp_dir/train/text +lexicon=data/local/dict/lexicon.txt + +for f in "$text" "$lexicon"; do + [ ! -f $x ] && echo "$0: No such file $f" && exit 1; +done + +# This script takes no arguments. It assumes you have already run +# fisher_data_prep.sh and fisher_prepare_dict.sh +# It takes as input the files +#data/train_all/text +#data/local/dict/lexicon.txt + +dir=`pwd`/data/local/lm +mkdir -p $dir +export LC_ALL=C # You'll get errors about things being not sorted, if you +# have a different locale. +export PATH=$PATH:`pwd`/../../../tools/kaldi_lm +( # First make sure the kaldi_lm toolkit is installed. + cd ../../../tools || exit 1; + if [ -d kaldi_lm ]; then + echo Not installing the kaldi_lm toolkit since it is already there. + else + echo Downloading and installing the kaldi_lm tools + if [ ! -f kaldi_lm.tar.gz ]; then + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; + fi + tar -xvzf kaldi_lm.tar.gz || exit 1; + cd kaldi_lm + make || exit 1; + echo Done making the kaldi_lm tools + fi +) || exit 1; + +mkdir -p $dir + + +cleantext=$dir/text.no_oov + +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ + > $cleantext || exit 1; + + +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ + sort -nr > $dir/word.counts || exit 1; + + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; + +# note: we probably won't really make use of as there aren't any OOVs +cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ + || exit 1; + +# note: ignore 1st field of train.txt, it's the utterance-id. +cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} + { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ + || exit 1; + +train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; + +# Perplexity over 88307.000000 words (excluding 691.000000 OOVs) is 71.241332 + +# note: output is +# data/local/lm/3gram-mincount/lm_unpruned.gz + + +exit 0 + +echo "Baseline" + +# From here is some commands to do a baseline with SRILM (assuming +# you have it installed). +heldout_sent=158126 # Don't change this if you want result to be comparable with + # kaldi_lm results +sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities. +mkdir -p $sdir +cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $sdir/heldout +cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $sdir/train + +cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) > $sdir/wordlist + + +ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \ + -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz +ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout + +# data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM +# file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs +# 0 zeroprobs, logprob= -165170 ppl= 71.7609 ppl1= 123.258 + + +# Note: perplexity SRILM gives to Kaldi-LM model is similar to what kaldi-lm reports above. +# Difference in WSJ must have been due to different treatment of . +ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout + +# data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM +# file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs +# 0 zeroprobs, logprob= -164990 ppl= 71.4278 ppl1= 122.614 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py new file mode 100755 index 00000000000..9c590635562 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +# Extracts one best output for a set of files +# The list of files in the conversations for which 1 best output has to be extracted +# words.txt + +import os +import sys + +scoringFile = "exp/sgmm2x_6a_mmi_b0.2/decode_test_it4/scoring/10.tra" +wordsFile = open('exp/sgmm2x_6a/graph/words.txt') +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/test') +oneBestTmp = 'exp/sgmm2x_6a_mmi_b0.2/one-best/asr-test' +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.test', 'w+') +timLocation = '/export/a04/gkumar/corpora/fishcall/fisher/tim' + +def findTranscription(timeDetail): + file1 = open(scoringFile) + for line in file1: + lineComp = line.split() + if lineComp[0] == timeDetail: + return " ".join(lineComp[1:]) + # No result found + return -1 + +words = {} + +# Extract word list +for line in wordsFile: + lineComp = line.split() + words[int(lineComp[1])] = lineComp[0].strip() + +# Now read list of files in conversations +fileList = [] +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# Now get timing information to concatenate the ASR outputs +if not os.path.exists(oneBestTmp): + os.makedirs(oneBestTmp) + +for item in fileList: + timingFile = open(timLocation + '/' + item + '.es') + newFile = open(oneBestTmp + '/' + item + '.es', 'w+') + for line in timingFile: + timeInfo = line.split() + mergedTranslation = "" + for timeDetail in timeInfo: + #Locate this in ASR dev/test, this is going to be very slow + tmp = findTranscription(timeDetail) + if tmp != -1: + mergedTranslation = mergedTranslation + " " + tmp + mergedTranslation = mergedTranslation.strip() + transWords = [words[int(x)] for x in mergedTranslation.split()] + newFile.write(" ".join(transWords) + "\n") + provFile.write(" ".join(transWords) + "\n") + + newFile.close() +provFile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl new file mode 100755 index 00000000000..ca5b2a46f8e --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl @@ -0,0 +1,39 @@ +#!/usr/bin/env perl + +# Nagendra Kumar Goel + +# This takes two arguments: +# 1) Pocolm training output folder +# 2) rnnlm weights file name (for output) + +use POSIX; +use List::Util qw[min max]; + +if (@ARGV != 2) { + die "Usage: get_data_weights.pl \n"; +} + +$pdir = shift @ARGV; +$out = shift @ARGV; + +open(P, "<$pdir/metaparameters") || die "Could not open $pdir/metaparameters"; +open(N, "<$pdir/names") || die "Could not open $pdir/names" ; +open(O, ">$out") || die "Could not open $out for writing" ; + +my %scores = (); + +while() { + @n = split(/\s/,$_); + $name = $n[1]; + $w =

; + @w = split(/\s/,$w); + $weight = $w[1]; + $scores{$name} = $weight; +} + +$min = min(values %scores); + +for(keys %scores) { + $weightout = POSIX::ceil($scores{$_} / $min); + print O "$_\t1\t$weightout\n"; +} diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py new file mode 100755 index 00000000000..5430c18bb5b --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +# Extracts one best output for a set of files +# The list of files in the conversations for which 1 best output has to be extracted +# words.txt + +from __future__ import print_function +import os +import sys +import subprocess + +latticeLocation = 'latjosh-bmmi/lattices-pushed/' + +tmpdir = 'data/local/data/tmp/bmmi-t/lattmp' +invalidplfdir = 'data/local/data/tmp/bmmi-t/invalidplf' +symtable = '/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/data/lang/words.clean.txt' + +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/test') +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/asr.test.plf', 'w+') +invalidPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/invalidPLF', 'w+') +blankPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/blankPLF', 'w+') +rmLines = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/removeLines', 'w+') + +if not os.path.exists(tmpdir): + os.makedirs(tmpdir) +if not os.path.exists(invalidplfdir): + os.makedirs(invalidplfdir) +else: + os.system("rm " + invalidplfdir + "/*") + +def latticeConcatenate(lat1, lat2): + ''' + Concatenates lattices, writes temporary results to tmpdir + ''' + if lat1 == "": + os.system('rm ' + tmpdir + '/tmp.lat') + return lat2 + else: + proc = subprocess.Popen(['fstconcat', lat1, lat2, (tmpdir + '/tmp.lat')]) + proc.wait() + return tmpdir + '/tmp.lat' + + +def findLattice(timeDetail): + ''' + Finds the lattice corresponding to a time segment + ''' + if os.path.isfile(latticeLocation + timeDetail + '.lat'): + return latticeLocation + timeDetail + '.lat' + else: + return -1 + + +# Now read list of files in conversations +fileList = [] +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# Now get timing information to concatenate the ASR outputs + +lineNo = 1 +for item in fileList: + timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') + for line in timingFile: + timeInfo = line.split() + + # For utterances that are concatenated in the translation file, + # the corresponding FSTs have to be translated as well + mergedTranslation = "" + for timeDetail in timeInfo: + tmp = findLattice(timeDetail) + if tmp != -1: + # Concatenate lattices + mergedTranslation = latticeConcatenate(mergedTranslation, tmp) + + print(mergedTranslation) + if mergedTranslation != "": + + # Sanjeev's Recipe : Remove epsilons and topo sort + finalFST = tmpdir + "/final.fst" + os.system("fstrmepsilon " + mergedTranslation + " | fsttopsort - " + finalFST) + + # Now convert to PLF + proc = subprocess.Popen('/export/a04/gkumar/corpora/fishcall/bin/fsm2plf.sh ' + symtable + ' ' + finalFST, stdout=subprocess.PIPE, shell=True) + PLFline = proc.stdout.readline() + finalPLFFile = tmpdir + "/final.plf" + finalPLF = open(finalPLFFile, "w+") + finalPLF.write(PLFline) + finalPLF.close() + + # now check if this is a valid PLF, if not write it's ID in a + # file so it can be checked later + proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True) + line = proc.stdout.readline() + print("{} {}".format(line, lineNo)) + if line.strip() != "PLF format appears to be correct.": + os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0]) + invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n") + rmLines.write("{}\n".format(lineNo)) + else: + provFile.write(PLFline) + else: + blankPLF.write(timeInfo[0] + "\n") + rmLines.write("{}\n".format(lineNo)) + # Now convert to PLF + lineNo += 1 + +provFile.close() +invalidPLF.close() +blankPLF.close() +rmLines.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh new file mode 100755 index 00000000000..451a7c529fb --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +# Gets lattice oracles +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +if [ $# -lt 3 ]; then + echo "Specify lattice dir, symbol table and text file for partition" + exit 1; +fi + +latticeDir=$1 +textFile=$3 +symTable=$2 +oracleDir=$latticeDir/oracle + +echo $latticeDir +echo $oracleDir + +. ./path.sh + +if [ ! -f $textFile -o ! -f $symTable -o ! -d $latticeDir ]; then + echo "Required files not found" + exit 1; +fi + +mkdir -p $oracleDir + +cat $textFile | sed 's:\[laughter\]::g' | sed 's:\[noise\]::g' | \ + utils/sym2int.pl --map-oov [oov] -f 2- $symTable | \ + $KALDI_ROOT/src/latbin/lattice-oracle --word-symbol-table=$symTable "ark:gunzip -c $latticeDir/lat.*.gz|" ark:- ark,t:$oracleDir/oracle.tra 2>$oracleDir/oracle.log + +sort -k1,1 -u $oracleDir/oracle.tra -o $oracleDir/oracle.tra diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl new file mode 100755 index 00000000000..0366dcdacb0 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl @@ -0,0 +1,66 @@ +#!/usr/bin/env perl +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# Once the phonetic representation for words is generated by the LDC lexicon +# This script converts them into a KALDI compatible format +# In addition, it extends the list of phonemes to consider based on +# orthograhic representations of those words which do not have stressed vowels + +use utf8; + +($tmpdir)=$ARGV[0]; +open(L, "<", "$tmpdir/lexicon_raw") || die "Can't open raw lexicon"; +open(P, "<" , "$tmpdir/phones") || die "Can't open phone file"; +open(I, ">$tmpdir/lexicon_one_column") || die "Can't open text file for writing"; +open(E, ">$tmpdir/phones_extended") || die "Can't open ex-phone file for writing"; +binmode(P, ":utf8"); +binmode(L, ":utf8"); +binmode(I, ":utf8"); +binmode(E, ":utf8"); + +#Get all phones +my %phones = qw(); +while (

) { + chomp; + $phones{$_} = 1; +} + +print @phones; + +while () { + if (substr($_, 0, 1) eq "#") { + print I $_; + next; + } + $len = length; + $current = 0; + $splitWord = ""; + while ($current < $len) { + #First check for two char codes + $currentChar2 = substr($_, $current, 2); + $currentChar1 = substr($_, $current, 1); + if (exists($phones{$currentChar2})) { + $splitWord = $splitWord . " " . $currentChar2; + $current = $current + 2; + } + else { + # Check if this phone exists + if (!exists($phones{$currentChar1})) { + $phones{$currentChar1} = 1 + } + $splitWord = $splitWord . " " . $currentChar1; + $current = $current + 1; + } + } + $splitWord =~ s/^\s*(.*?)\s*$/$1/; + print I $splitWord, "\n"; +} + +# Now write the phones to the extended phone file +foreach my $key (keys %phones) { + print E $key, "\n"; +} + +close(L); +close(P); +close(I); +close(E); diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh new file mode 100755 index 00000000000..bbe0af5810c --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Author : Gaurav Kumar, Johns Hopkins University +# Creates OpenFST lattices from Kaldi lattices +# This script needs to be run from one level above this directory + +. ./path.sh + +if [ $# -lt 3 ]; then + echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale" + exit 1 +fi + +prunebeam=2 + +latdir=$1 +decode_dir=$2 +acoustic_scale=$3 +#latdir="latjosh-2-callhome" +#decode_dir=exp/tri5a/decode_$partition +#acoustic_scale=0.077 + +stage=0 + +if [ -d $decode_dir ] +then + # TODO:Add scaling factor for weights, how? + rawLatDir="lattices" + compiledLatDir="lattices-bin" + preplfLatDir="lattices-pushed" + + mkdir -p $latdir + mkdir -p $latdir/$rawLatDir + mkdir -p $latdir/$compiledLatDir + mkdir -p $latdir/$preplfLatDir + + for l in $decode_dir/lat.*.gz + do + ( + # Extract file name and unzip the file first + bname=${l##*/} + bname="$latdir/${bname%.gz}" + gunzip -c $l > "$bname.bin" + + if [ $stage -le 0 ]; then + + # Now copy into ark format + $KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw" + + # Prune lattices + $KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned" + + # Convert to an openfst compatible format + $KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst + + fi + + if [ $stage -le 1 ]; then + fileName="" + fileLine=0 + + while read line; do + if [ $fileLine = 0 ]; then + fileName="$line" + fileLine=1 + continue + fi + if [ -z "$line" ]; then + fileLine=0 + continue + fi + # Replace laugh, unk, oov, noise with eps + echo "$line" | awk '{if ($3 == 2038 || $3 == 2039 || $3 == 2040) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat" + done < $bname.ark.fst + echo "Done isolating lattices" + fi + ) & + done + wait + rm $latdir/*.bin + rm $latdir/*.pruned + + + if [ $stage -le 2 ]; then + #Compile lattices + for l in $latdir/$rawLatDir/*.lat + do + ( + # Arc type needs to be log + bname=${l##*/} + fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname + ) & + done + wait + echo "Done compiling lattices." + fi + + if [ $stage -le 3 ]; then + #Sanjeev's Recipe for creating valid PLF compatible FSTs" + # Create a dummy FST with one state and no arcs first + echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst + # Push Lattice weights towards initial state + for l in $latdir/$compiledLatDir/*.lat + do + ( + bname=${l##*/} + fstrmepsilon $latdir/$compiledLatDir/$bname | \ + fstpush --push_weights --remove_total_weight - | \ + # Do not topo sort here, do it before converting into PLF + # Sanjeev's Recipe : Concatenate with dummy FST + fstconcat - $latdir/$preplfLatDir/dummy.fst | \ + fstreverse - | \ + fstrmepsilon - | \ + fstreverse - $latdir/$preplfLatDir/$bname + ) & + done + wait + # Let's take a moment to thank the dummy FST for playing its + # part in this process. However, it has to go now. + rm $latdir/$preplfLatDir/dummy.fst + echo "Done performing fst push (initial state)" + fi +else + echo "Complete training and decoding first" +fi diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py b/egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py new file mode 100755 index 00000000000..94546dc44c3 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc., Avaaya + +# Merges unique words from Spanish Fisher, Gigaword and the LDC spanish lexicon +from __future__ import print_function +import sys +import re +import json +import codecs +import operator + +wordlimit = 64000 +tmpdir = sys.argv[1] +ldc_lexicon = sys.argv[2] +uw_fisher = tmpdir + "/uniquewords" +uw_gigaword = tmpdir + "/es_wordlist.json" +uw_LDC = ldc_lexicon + "/callhome_spanish_lexicon_970908/preferences" + +filtered_letters = re.compile(u'[¡¥ª°º¿àçèëìîôö0123456789]') +merged_lexicon = [] +# All three lexicons are in different formats +# First add the data from lexicon_fisher (A) into the dictionary +fisher = codecs.open(uw_fisher, encoding='utf-8') +for line in fisher: + merged_lexicon.append(line.strip()) +fisher.close() + +print("After adding the fisher data, the lexicon contains {} entries".format(len(merged_lexicon))) + +# Now add data from the LDC lexicon +ldc = codecs.open(uw_LDC, encoding='iso-8859-1') +for line in ldc: + entries = line.strip().split('\t') + if entries[0].lower() not in merged_lexicon: + merged_lexicon.append(entries[0].lower()) + +print("After adding the LDC data, the lexicon contains {} entries".format(len(merged_lexicon))) + +# Finally add the gigaword data +gigaword = json.load(open(uw_gigaword)) +gigaword = reversed(sorted(gigaword.items(), key=operator.itemgetter(1))) + +for item in gigaword: + # We need a maximum of wordlimit words in the lexicon + if len(merged_lexicon) == wordlimit: + break + + if item[0].lower() not in merged_lexicon: + merged_lexicon.append(item[0].lower()) + +print("After adding the Gigaword data, the lexicon contains {} entries".format(len(merged_lexicon))) + +# Now write the uniquewords to a file +lf = codecs.open(tmpdir + '/uniquewords64k', encoding='utf-8', mode='w+') +ltuples = sorted(merged_lexicon) + +for item in ltuples: + if not item==u'ñ' and not re.search(filtered_letters, item): + lf.write(item + "\n") + +lf.close() + +print("Finshed writing unique words") diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh new file mode 100755 index 00000000000..a95893f698a --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +currentJob=0 + +dir=/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/exp/sgmm2x_6a_denlats + +for f in $dir/.done.*; do + d=`echo ${f##*/} | awk 'BEGIN {FS="."} {print $3}'` + if [ $d -gt $currentJob ]; then + currentJob=$d + fi +done + +currentJob=$((currentJob+1)) + +echo Currently processing job : $currentJob + +for i in $(seq 210); do + job[$i]=$i +done + +dir=/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/exp/sgmm2x_6a_denlats/log/$currentJob/q + +for f in $dir/done.*; do + d=`echo ${f##*/} | awk 'BEGIN {FS="."} {print $3}'` + unset job[$d] +done + +echo sub-splits left : ${#job[@]} +echo ${job[@]} diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..cc9de4d26c5 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh @@ -0,0 +1,187 @@ +#!/bin/bash + +set -e -o pipefail + +# This script is called from scripts like local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more scripts). It +# contains the common feature preparation and iVector-related parts of the +# script. See those scripts for examples of usage. + + +stage=7 +nj=30 +train_set=train # you might set this to e.g. train. +test_sets="test dev" +gmm=tri5a # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix= # affix for exp/nnet3 directory to put iVector stuff in (e.g. + # in the tedlium recip it's _cleaned). + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 7 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 7." + exit 1 +fi + + +if [ $stage -le 8 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 9 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/wsj-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 10 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + # train a diagonal UBM using a subset of about a quarter of the data + num_utts_total=$(wc -l in the history of a n-gram +# un-comment the following line +#limit_unk_history_opt="--limit-unk-history=true" + +for order in 3; do + # decide on the vocabulary. + # Note: you'd use --wordlist if you had a previously determined word-list + # that you wanted to use. + lm_name="${num_word}_${order}" + min_counts='' + # Note: the following might be a more reasonable setting: + # min_counts='fisher=2 swbd1=1' + if [ -n "${min_counts}" ]; then + lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`" + fi + unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm + train_lm.py --num-words=${num_word} --num-splits=5 --warm-start-ratio=10 ${max_memory} \ + --min-counts=${min_counts} \ + --keep-int-data=true ${fold_dev_opt} ${bypass_metaparam_optim_opt} \ + ${limit_unk_history_opt} ${textdir} ${order} ${lm_dir}/work ${unpruned_lm_dir} + + mkdir -p ${arpa_dir} + format_arpa_lm.py ${max_memory} ${unpruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_unpruned.arpa.gz + + # example of pruning. note: the threshold can be less than or more than one. + get_data_prob.py ${max_memory} ${textdir}/dev.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' + for threshold in 1.0 2.0 4.0; do + pruned_lm_dir=${lm_dir}/${lm_name}_prune${threshold}.pocolm + prune_lm_dir.py --final-threshold=${threshold} ${max_memory} ${unpruned_lm_dir} ${pruned_lm_dir} 2>&1 | tail -n 5 | head -n 3 + get_data_prob.py ${max_memory} ${textdir}/dev.txt ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' + + format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${threshold}.arpa.gz + + done + + # example of pruning by size. + size=1000000 + pruned_lm_dir=${lm_dir}/${lm_name}_prune${size}.pocolm + prune_lm_dir.py --target-num-ngrams=${size} ${max_memory} ${unpruned_lm_dir} ${pruned_lm_dir} 2>&1 | tail -n 8 | head -n 6 | grep -v 'log-prob changes' + get_data_prob.py ${textdir}/dev.txt ${max_memory} ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' + + format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${size}.arpa.gz + +done + +# (run local/srilm_baseline.sh ${num_word} to see the following result e.g. local/srilm_baseline.sh 40000 ) + +# the following does does some self-testing, including +# that the computed derivatives are accurate. +# local/self_test.sh + +# perplexities from pocolm-estimated language models with pocolm's interpolation +# method from orders 3, 4, and 5 are: +# order 3: optimize_metaparameters.py: final perplexity without barrier function was -4.358818 (perplexity: 78.164689) +# order 4: optimize_metaparameters.py: final perplexity without barrier function was -4.309507 (perplexity: 74.403797) +# order 5: optimize_metaparameters.py: final perplexity without barrier function was -4.301741 (perplexity: 73.828181) + +# note, the perplexities from pocolm-estimated language models with SRILM's +# interpolation from orders 3 and 4 are (from local/pocolm_with_srilm_combination.sh), +# 78.8449 and 75.2202 respectively. + +# note, the perplexities from SRILM-estimated language models with SRILM's +# interpolation tool from orders 3 and 4 are (from local/srilm_baseline.sh), +# 78.9056 and 75.5528 respectively. diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py b/egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py new file mode 100755 index 00000000000..5c68e1204b2 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +# Processes lattice oracles + +import os +import sys + +oracleDir = "exp/tri5a/decode_callhome_train/oracle" +wordsFile = open('exp/sgmm2x_6a/graph/words.txt') +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/train') +oracleTmp = 'exp/tri5a/one-best/oracle-ch-train' +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/oracle.train', 'w+') +timLocation = '/export/a04/gkumar/corpora/fishcall/callhome/tim' + +def findTranscription(timeDetail): + file1 = open(oracleDir + "/oracle.tra") + for line in file1: + lineComp = line.split() + if lineComp[0] == timeDetail: + return " ".join(lineComp[1:]) + # No result found + return -1 + +words = {} + +# Extract word list +for line in wordsFile: + lineComp = line.split() + words[int(lineComp[1])] = lineComp[0].strip() + +# Now read list of files in conversations +fileList = [] +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# TODO: Make sure they match the order in which these english files are being written + +# Now get timing information to concatenate the ASR outputs +if not os.path.exists(oracleTmp): + os.makedirs(oracleTmp) + +#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') +for item in fileList: + timingFile = open(timLocation + '/' + item + '.es') + newFile = open(oracleTmp + '/' + item + '.es', 'w+') + for line in timingFile: + timeInfo = line.split() + mergedTranslation = "" + for timeDetail in timeInfo: + #Locate this in ASR dev/test, this is going to be very slow + tmp = findTranscription(timeDetail) + if tmp != -1: + mergedTranslation = mergedTranslation + " " + tmp + mergedTranslation = mergedTranslation.strip() + transWords = [words[int(x)] for x in mergedTranslation.split()] + newFile.write(" ".join(transWords) + "\n") + provFile.write(" ".join(transWords) + "\n") + + newFile.close() +provFile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh new file mode 100755 index 00000000000..1b54b304e50 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +. ./cmd.sh + +for iter in 1 2 3 4; do + steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ + --transform-dir exp/tri5a/decode_test data/lang data/test exp/sgmm2x_6a/decode_test_fmllr \ + exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it$iter & +done + + +for iter in 1 2 3 4; do + steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ + --transform-dir exp/tri5a/decode_dev data/lang data/dev exp/sgmm2x_6a/decode_dev_fmllr \ + exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it$iter & +done + + +for iter in 1 2 3 4; do + steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ + --transform-dir exp/tri5a/decode_dev2 data/lang data/dev2 exp/sgmm2x_6a/decode_dev2_fmllr \ + exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it$iter & +done diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh new file mode 100755 index 00000000000..aa06fdbb293 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) +# 2015 Guoguo Chen +# 2017 Hainan Xu +# 2017 Xiaohui Zhang + +# This script trains LMs on the swbd LM-training data. + +# rnnlm/train_rnnlm.sh: best iteration (out of 35) was 34, linking it to final iteration. +# rnnlm/train_rnnlm.sh: train/dev perplexity was 41.9 / 50.0. +# Train objf: -5.07 -4.43 -4.25 -4.17 -4.12 -4.07 -4.04 -4.01 -3.99 -3.98 -3.96 -3.94 -3.92 -3.90 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83 -3.82 -3.81 -3.80 -3.79 -3.78 -3.78 -3.77 -3.77 -3.76 -3.75 -3.74 -3.73 -3.73 -3.72 -3.71 +# Dev objf: -10.32 -4.68 -4.43 -4.31 -4.24 -4.19 -4.15 -4.13 -4.10 -4.09 -4.05 -4.03 -4.02 -4.00 -3.99 -3.98 -3.98 -3.97 -3.96 -3.96 -3.95 -3.94 -3.94 -3.94 -3.93 -3.93 -3.93 -3.92 -3.92 -3.92 -3.92 -3.91 -3.91 -3.91 -3.91 + + +dir=Spanish_gigawrd/rnnlm +pocolm_dir=Spanish_gigawrd/work_pocolm/lm/110000_3.pocolm_pruned +wordslist= +embedding_dim=1024 +lstm_rpd=256 +lstm_nrpd=256 +stage=0 +train_stage=-30 +text=Spanish_gigawrd/text_lm +text_dir=Spanish_gigawrd/text_lm + +. ./cmd.sh +. ./utils/parse_options.sh + +mkdir -p $dir/config +set -e + +for f in $text/dev.txt; do + [ ! -f $f ] && \ + echo "$0: expected file $f to exist;" && exit 1 +done + +if [ $stage -le 0 ]; then + if [ -f $text_dir/unigram_weights ] ; then + mv $text_dir/unigram_weights $pocolm_dir/ + fi + cp $wordslist $dir/config/words.txt + n=`cat $dir/config/words.txt | wc -l` + echo " $n" >> $dir/config/words.txt + + # words that are not present in words.txt but are in the training or dev data, will be + # mapped to during training. + echo "" >$dir/config/oov.txt + local/get_data_weights.pl $pocolm_dir $dir/config/data_weights.txt + rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \ + --unk-word="" \ + --data-weights-file=$dir/config/data_weights.txt \ + $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt + + # choose features + rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ + --use-constant-feature=true \ + --special-words=',,,,[noise],[laughter]' \ + $dir/config/words.txt > $dir/config/features.txt +fi + +if [ $stage -le 1 ]; then + cat <$dir/config/xconfig + input dim=$embedding_dim name=input + relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1)) + fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd + relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3)) + fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd + relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3)) + output-layer name=output include-log-softmax=false dim=$embedding_dim +EOF + rnnlm/validate_config_dir.sh $text_dir $dir/config +fi + +if [ $stage -le 2 ]; then + rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir +fi + +if [ $stage -le 3 ]; then + rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 2 \ + --stage $train_stage --num-epochs 5 --cmd "$train_cmd" $dir +fi + +exit 0 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh new file mode 100755 index 00000000000..4a26f6857b8 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -euo pipefail + +punctuation_symbols=( "," "\"" "\`" "\:" "(" ")" "-" ";" "?" "!" "/" "_" "{" "}" "*" ) + +config=$1 +path_prefix=$2 +data=$3 +job=$4 +dir=$5 + +substitute_arg="" +num_syms=0 + +for i in "${punctuation_symbols[@]}"; do + symbol=${punctuation_symbols[${num_syms}]} + if [ $num_syms -eq 0 ]; then + substitute_arg="sed 's:${i}: :g'" + else + substitute_arg=$substitute_arg" | sed 's:${i}: :g'" + fi + substitute_arg=$substitute_arg" |sed 's:${i}$: :g' | sed 's:^${i}: :g'" + num_syms=$((num_syms+1)) +done +mkdir -p $dir/normalize/$job +echo "cat $data/$job | $substitute_arg" > $dir/normalize/$job/substitute.sh +bash $dir/normalize/$job/substitute.sh | \ + sed "s: 's:'s:g" | sed "s: 'm:'m:g" | \ + sed "s: \s*: :g" | tr 'A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ' 'a-zâáàäêéèëïíîöóôöúùûñç' > $dir/normalize/$job/text +normalizer_main --config=$config --path_prefix=$path_prefix <$dir/normalize/$job/text >$dir/$job.txt + +exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh new file mode 100755 index 00000000000..9148b1f1171 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +# This is as run_sgmm2.sh but excluding the "speaker-dependent weights", +# so not doing the symmetric SGMM. + +. ./cmd.sh + +## SGMM on top of LDA+MLLT+SAT features. +if [ ! -f exp/ubm6a/final.mdl ]; then + steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 800 data/train data/lang exp/tri5a_ali exp/ubm6a || exit 1; +fi +# Double the number of SAT states : sanjeev +steps/train_sgmm2.sh --spk-dep-weights false --cmd "$train_cmd" 10000 120000 \ + data/train data/lang exp/tri5a_ali exp/ubm6a/final.ubm exp/sgmm2x_6a || exit 1; + +utils/mkgraph.sh data/lang_test exp/sgmm2x_6a exp/sgmm2x_6a/graph || exit 1; + +steps/decode_sgmm2.sh --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ + --transform-dir exp/tri5a/decode_dev exp/sgmm2x_6a/graph data/dev exp/sgmm2x_6a/decode_dev || exit 1; + +steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ + --transform-dir exp/tri5a/decode_dev exp/sgmm2x_6a/graph data/dev exp/sgmm2x_6a/decode_dev_fmllr || exit 1; + +steps/decode_sgmm2.sh --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ + --transform-dir exp/tri5a/decode_test exp/sgmm2x_6a/graph data/test exp/sgmm2x_6a/decode_test || exit 1; + +steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ + --transform-dir exp/tri5a/decode_test exp/sgmm2x_6a/graph data/test exp/sgmm2x_6a/decode_test_fmllr || exit 1; + +steps/decode_sgmm2.sh --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ + --transform-dir exp/tri5a/decode_dev2 exp/sgmm2x_6a/graph data/dev2 exp/sgmm2x_6a/decode_dev2 || exit 1; + +steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ + --transform-dir exp/tri5a/decode_dev2 exp/sgmm2x_6a/graph data/dev2 exp/sgmm2x_6a/decode_dev2_fmllr || exit 1; + + # Now we'll align the SGMM system to prepare for discriminative training. + steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri5a \ + --use-graphs true --use-gselect true data/train data/lang exp/sgmm2x_6a exp/sgmm2x_6a_ali || exit 1; + steps/make_denlats_sgmm2.sh --nj 30 --sub-split 210 --cmd "$decode_cmd" --transform-dir exp/tri5a \ + data/train data/lang exp/sgmm2x_6a_ali exp/sgmm2x_6a_denlats + steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri5a --boost 0.2 \ + data/train data/lang exp/sgmm2x_6a_ali exp/sgmm2x_6a_denlats exp/sgmm2x_6a_mmi_b0.2 + + for iter in 1 2 3 4; do + steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ + --transform-dir exp/tri5a/decode_test data/lang data/test exp/sgmm2x_6a/decode_test exp/sgmm2x_6a_mmi_b0.2/decode_test_it$iter & + done + +wait +steps/decode_combine.sh data/test data/lang exp/tri1/decode exp/tri2a/decode exp/combine_1_2a/decode || exit 1; +steps/decode_combine.sh data/test data/lang exp/sgmm2x_4a/decode exp/tri3b_mmi/decode exp/combine_sgmm2x_4a_3b/decode || exit 1; +# combining the sgmm run and the best MMI+fMMI run. +steps/decode_combine.sh data/test data/lang exp/sgmm2x_4a/decode exp/tri3b_fmmi_c/decode_it5 exp/combine_sgmm2x_4a_3b_fmmic5/decode || exit 1; + +steps/decode_combine.sh data/test data/lang exp/sgmm2x_4a_mmi_b0.2/decode_it4 exp/tri3b_fmmi_c/decode_it5 exp/combine_sgmm2x_4a_mmi_3b_fmmic5/decode || exit 1; + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/score.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/score.sh new file mode 120000 index 00000000000..0afefc3158c --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/score.sh @@ -0,0 +1 @@ +../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh new file mode 100755 index 00000000000..21b793a4d27 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +oracle_dir=exp/tri5a/decode_callhome_test/oracle +split=callhome_test +data_dir=data/callhome_test +lang_dir=data/lang + +# Make sure that your STM and CTM files are in UTF-8 encoding +# Any other encoding will cause this script to fail/misbehave + +if [ ! -e $oracle_dir -o ! -e $data_dir -o ! -e $lang_dir ]; then + echo "Missing pre-requisites" + exit 1 +fi + +for i in {5..20}; do + mkdir -p $oracle_dir/score_$i + cp $oracle_dir/$split.ctm $oracle_dir/score_$i/ +done + +. /export/babel/data/software/env.sh + +# Start scoring +/export/a11/guoguo/babel/103-bengali-limitedLP.official/local/score_stm.sh $data_dir $lang_dir \ + $oracle_dir + +# Print a summary of the result +grep "Percent Total Error" $oracle_dir/score_*/$split.ctm.dtl diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev new file mode 100644 index 00000000000..77e3b01786f --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev @@ -0,0 +1,20 @@ +sp_0897.sph +sp_0968.sph +sp_0981.sph +sp_1062.sph +sp_1292.sph +sp_1411.sph +sp_1413.sph +sp_1552.sph +sp_1554.sph +sp_1805.sph +sp_1808.sph +sp_1882.sph +sp_1930.sph +sp_1947.sph +sp_2037.sph +sp_2054.sph +sp_2057.sph +sp_2107.sph +sp_2109.sph +sp_2144.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev new file mode 100644 index 00000000000..77e3b01786f --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev @@ -0,0 +1,20 @@ +sp_0897.sph +sp_0968.sph +sp_0981.sph +sp_1062.sph +sp_1292.sph +sp_1411.sph +sp_1413.sph +sp_1552.sph +sp_1554.sph +sp_1805.sph +sp_1808.sph +sp_1882.sph +sp_1930.sph +sp_1947.sph +sp_2037.sph +sp_2054.sph +sp_2057.sph +sp_2107.sph +sp_2109.sph +sp_2144.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test new file mode 100644 index 00000000000..0cbc3cc95fd --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test @@ -0,0 +1,20 @@ +sp_0053.sph +sp_0082.sph +sp_0084.sph +sp_0088.sph +sp_0681.sph +sp_0699.sph +sp_0776.sph +sp_0857.sph +sp_1031.sph +sp_1100.sph +sp_1148.sph +sp_1156.sph +sp_1186.sph +sp_1212.sph +sp_1345.sph +sp_1435.sph +sp_1578.sph +sp_1648.sph +sp_1807.sph +sp_1847.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train new file mode 100644 index 00000000000..2c936072534 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train @@ -0,0 +1,80 @@ +sp_0085.sph +sp_0096.sph +sp_0098.sph +sp_0100.sph +sp_0291.sph +sp_0713.sph +sp_0724.sph +sp_0726.sph +sp_0731.sph +sp_0733.sph +sp_0753.sph +sp_0788.sph +sp_0826.sph +sp_0831.sph +sp_0836.sph +sp_0841.sph +sp_0850.sph +sp_0855.sph +sp_0892.sph +sp_0899.sph +sp_0910.sph +sp_0917.sph +sp_0919.sph +sp_0923.sph +sp_0945.sph +sp_0950.sph +sp_0951.sph +sp_0992.sph +sp_0997.sph +sp_1013.sph +sp_1039.sph +sp_1044.sph +sp_1045.sph +sp_1058.sph +sp_1060.sph +sp_1063.sph +sp_1081.sph +sp_1106.sph +sp_1122.sph +sp_1140.sph +sp_1175.sph +sp_1195.sph +sp_1198.sph +sp_1231.sph +sp_1234.sph +sp_1255.sph +sp_1260.sph +sp_1261.sph +sp_1262.sph +sp_1264.sph +sp_1266.sph +sp_1273.sph +sp_1275.sph +sp_1284.sph +sp_1286.sph +sp_1304.sph +sp_1308.sph +sp_1333.sph +sp_1341.sph +sp_1353.sph +sp_1368.sph +sp_1379.sph +sp_1384.sph +sp_1449.sph +sp_1463.sph +sp_1574.sph +sp_1740.sph +sp_1759.sph +sp_1849.sph +sp_1908.sph +sp_1915.sph +sp_1918.sph +sp_1974.sph +sp_1976.sph +sp_1988.sph +sp_2000.sph +sp_2056.sph +sp_2070.sph +sp_2091.sph +sp_2101.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev new file mode 100644 index 00000000000..d3769f0ffb5 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev @@ -0,0 +1,20 @@ +20051009_182032_217_fsp.sph +20051009_210519_219_fsp.sph +20051010_212418_225_fsp.sph +20051016_180547_265_fsp.sph +20051016_210626_267_fsp.sph +20051017_180712_270_fsp.sph +20051017_220530_275_fsp.sph +20051017_234550_276_fsp.sph +20051018_210220_279_fsp.sph +20051018_210744_280_fsp.sph +20051019_190221_288_fsp.sph +20051019_210146_289_fsp.sph +20051019_230329_292_fsp.sph +20051022_180817_311_fsp.sph +20051023_232057_325_fsp.sph +20051024_180453_327_fsp.sph +20051024_181110_329_fsp.sph +20051025_212334_337_fsp.sph +20051026_180724_341_fsp.sph +20051026_211309_346_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 new file mode 100644 index 00000000000..f1b5c293d67 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 @@ -0,0 +1,20 @@ +20050909_210655_26_fsp.sph +20050910_210708_33_fsp.sph +20050913_210933_49_fsp.sph +20050913_211649_50_fsp.sph +20050915_210434_65_fsp.sph +20050916_180332_68_fsp.sph +20050918_180733_81_fsp.sph +20050918_210841_82_fsp.sph +20050920_212030_93_fsp.sph +20050921_210443_99_fsp.sph +20050923_211304_115_fsp.sph +20050925_180713_120_fsp.sph +20050925_180825_121_fsp.sph +20050926_180516_125_fsp.sph +20050926_180555_126_fsp.sph +20050928_000254_141_fsp.sph +20050930_210540_161_fsp.sph +20051002_180726_170_fsp.sph +20051007_181850_205_fsp.sph +20051007_191217_206_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test new file mode 100644 index 00000000000..6190ced077c --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test @@ -0,0 +1,20 @@ +20051028_180633_356_fsp.sph +20051029_211606_365_fsp.sph +20051030_193924_371_fsp.sph +20051101_212731_386_fsp.sph +20051102_134901_389_fsp.sph +20051102_180402_391_fsp.sph +20051102_181501_393_fsp.sph +20051103_211105_404_fsp.sph +20051103_233456_406_fsp.sph +20051107_184634_438_fsp.sph +20051109_180253_445_fsp.sph +20051109_210353_450_fsp.sph +20051111_181045_470_fsp.sph +20051111_182216_472_fsp.sph +20051112_181649_485_fsp.sph +20051113_155059_492_fsp.sph +20051113_210221_496_fsp.sph +20051113_214925_498_fsp.sph +20051114_181749_505_fsp.sph +20051115_212123_516_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train new file mode 100644 index 00000000000..b57683842b2 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train @@ -0,0 +1,759 @@ +20050908_182943_22_fsp.sph +20050908_191808_23_fsp.sph +20050909_210428_25_fsp.sph +20050909_221657_28_fsp.sph +20050910_180310_29_fsp.sph +20050910_180330_30_fsp.sph +20050910_181354_31_fsp.sph +20050910_190223_32_fsp.sph +20050911_180647_34_fsp.sph +20050911_200216_35_fsp.sph +20050911_210429_36_fsp.sph +20050911_210530_37_fsp.sph +20050911_210904_38_fsp.sph +20050912_181441_40_fsp.sph +20050912_181538_41_fsp.sph +20050912_182044_42_fsp.sph +20050912_212913_43_fsp.sph +20050913_180324_44_fsp.sph +20050913_180731_46_fsp.sph +20050913_180947_47_fsp.sph +20050913_210409_48_fsp.sph +20050914_000831_51_fsp.sph +20050914_180332_52_fsp.sph +20050914_180606_53_fsp.sph +20050914_181020_54_fsp.sph +20050914_210243_55_fsp.sph +20050914_210822_56_fsp.sph +20050914_220753_58_fsp.sph +20050915_180728_60_fsp.sph +20050915_180740_61_fsp.sph +20050915_192457_62_fsp.sph +20050915_194045_63_fsp.sph +20050915_210200_64_fsp.sph +20050915_210916_66_fsp.sph +20050915_212325_67_fsp.sph +20050916_180740_69_fsp.sph +20050916_200334_70_fsp.sph +20050916_210235_71_fsp.sph +20050916_210510_72_fsp.sph +20050916_223656_73_fsp.sph +20050917_210406_74_fsp.sph +20050917_210805_75_fsp.sph +20050917_211045_76_fsp.sph +20050917_212041_77_fsp.sph +20050918_180326_80_fsp.sph +20050919_000612_83_fsp.sph +20050919_180511_84_fsp.sph +20050919_180703_85_fsp.sph +20050919_180925_86_fsp.sph +20050919_190254_87_fsp.sph +20050920_180330_88_fsp.sph +20050920_180342_89_fsp.sph +20050920_180607_90_fsp.sph +20050920_181919_91_fsp.sph +20050920_211414_92_fsp.sph +20050920_230520_94_fsp.sph +20050921_180639_95_fsp.sph +20050921_181002_96_fsp.sph +20050921_210340_98_fsp.sph +20050921_211329_101_fsp.sph +20050921_221625_102_fsp.sph +20050922_180618_103_fsp.sph +20050922_180948_104_fsp.sph +20050922_210740_106_fsp.sph +20050922_211003_107_fsp.sph +20050922_230412_108_fsp.sph +20050923_180514_110_fsp.sph +20050923_180530_111_fsp.sph +20050923_210442_114_fsp.sph +20050924_180747_117_fsp.sph +20050924_181124_118_fsp.sph +20050925_210645_122_fsp.sph +20050925_231407_123_fsp.sph +20050926_000425_124_fsp.sph +20050926_180719_127_fsp.sph +20050926_220244_130_fsp.sph +20050926_230706_131_fsp.sph +20050927_180422_132_fsp.sph +20050927_181033_133_fsp.sph +20050927_181232_134_fsp.sph +20050927_210320_135_fsp.sph +20050927_210848_136_fsp.sph +20050927_210947_138_fsp.sph +20050927_211929_139_fsp.sph +20050927_231016_140_fsp.sph +20050928_180631_142_fsp.sph +20050928_210256_144_fsp.sph +20050928_210700_145_fsp.sph +20050928_211113_146_fsp.sph +20050928_220320_147_fsp.sph +20050928_232236_148_fsp.sph +20050929_180318_149_fsp.sph +20050929_180722_150_fsp.sph +20050929_180932_151_fsp.sph +20050929_211337_153_fsp.sph +20050929_220820_154_fsp.sph +20050929_230406_155_fsp.sph +20050930_180329_156_fsp.sph +20050930_180411_157_fsp.sph +20050930_180646_158_fsp.sph +20050930_200308_159_fsp.sph +20051001_180328_163_fsp.sph +20051001_181004_164_fsp.sph +20051001_210749_166_fsp.sph +20051001_211346_167_fsp.sph +20051002_180339_169_fsp.sph +20051002_210324_171_fsp.sph +20051002_220651_174_fsp.sph +20051003_180434_175_fsp.sph +20051003_211042_178_fsp.sph +20051003_220633_179_fsp.sph +20051004_180351_180_fsp.sph +20051004_180542_181_fsp.sph +20051004_180730_182_fsp.sph +20051004_200737_183_fsp.sph +20051004_211611_185_fsp.sph +20051005_180420_187_fsp.sph +20051005_180709_188_fsp.sph +20051005_213606_191_fsp.sph +20051005_220917_192_fsp.sph +20051005_230659_193_fsp.sph +20051006_180416_194_fsp.sph +20051006_180653_195_fsp.sph +20051006_180815_196_fsp.sph +20051006_181525_197_fsp.sph +20051006_183153_199_fsp.sph +20051006_210246_200_fsp.sph +20051006_210417_201_fsp.sph +20051006_220329_203_fsp.sph +20051008_000036_208_fsp.sph +20051008_180249_209_fsp.sph +20051008_181720_210_fsp.sph +20051008_183224_211_fsp.sph +20051008_190256_212_fsp.sph +20051008_211712_214_fsp.sph +20051008_213416_215_fsp.sph +20051009_180444_216_fsp.sph +20051009_190753_218_fsp.sph +20051009_220443_221_fsp.sph +20051010_180650_222_fsp.sph +20051010_182706_223_fsp.sph +20051010_210622_224_fsp.sph +20051010_222853_227_fsp.sph +20051010_231630_228_fsp.sph +20051011_181919_230_fsp.sph +20051011_211026_232_fsp.sph +20051011_220348_233_fsp.sph +20051012_180233_234_fsp.sph +20051012_190241_236_fsp.sph +20051012_193952_237_fsp.sph +20051012_224157_239_fsp.sph +20051013_180458_240_fsp.sph +20051013_180613_241_fsp.sph +20051013_180700_242_fsp.sph +20051013_182213_244_fsp.sph +20051013_210221_245_fsp.sph +20051013_210425_246_fsp.sph +20051013_210941_247_fsp.sph +20051013_220243_248_fsp.sph +20051014_180259_249_fsp.sph +20051014_180940_250_fsp.sph +20051014_180948_251_fsp.sph +20051014_183707_252_fsp.sph +20051014_210348_253_fsp.sph +20051014_210647_254_fsp.sph +20051014_220227_256_fsp.sph +20051014_230339_257_fsp.sph +20051015_180549_258_fsp.sph +20051015_190247_259_fsp.sph +20051015_210138_260_fsp.sph +20051015_210701_261_fsp.sph +20051015_210831_262_fsp.sph +20051016_180926_266_fsp.sph +20051017_000346_269_fsp.sph +20051017_210137_273_fsp.sph +20051017_215732_274_fsp.sph +20051018_180559_277_fsp.sph +20051018_180816_278_fsp.sph +20051018_211701_282_fsp.sph +20051018_231046_283_fsp.sph +20051018_235317_284_fsp.sph +20051019_180448_285_fsp.sph +20051019_183344_287_fsp.sph +20051020_180339_293_fsp.sph +20051020_180759_295_fsp.sph +20051020_210218_297_fsp.sph +20051020_212525_299_fsp.sph +20051020_222944_300_fsp.sph +20051020_234953_301_fsp.sph +20051021_180218_302_fsp.sph +20051021_180508_303_fsp.sph +20051021_190605_304_fsp.sph +20051021_210159_305_fsp.sph +20051021_210530_306_fsp.sph +20051021_222225_307_fsp.sph +20051022_001311_309_fsp.sph +20051022_180452_310_fsp.sph +20051022_180829_312_fsp.sph +20051022_190406_313_fsp.sph +20051022_200517_314_fsp.sph +20051022_210920_315_fsp.sph +20051022_230324_316_fsp.sph +20051022_232428_317_fsp.sph +20051023_180342_318_fsp.sph +20051023_180530_319_fsp.sph +20051023_190301_321_fsp.sph +20051023_210258_322_fsp.sph +20051023_210605_323_fsp.sph +20051023_223751_324_fsp.sph +20051024_000348_326_fsp.sph +20051024_180624_328_fsp.sph +20051024_210748_330_fsp.sph +20051024_211346_331_fsp.sph +20051024_221753_332_fsp.sph +20051024_230857_333_fsp.sph +20051025_180351_334_fsp.sph +20051025_210532_335_fsp.sph +20051025_210959_336_fsp.sph +20051025_220419_338_fsp.sph +20051026_180611_340_fsp.sph +20051026_190359_343_fsp.sph +20051026_210334_344_fsp.sph +20051026_211202_345_fsp.sph +20051026_230956_347_fsp.sph +20051026_234001_348_fsp.sph +20051027_180217_349_fsp.sph +20051027_210159_351_fsp.sph +20051027_210333_352_fsp.sph +20051027_211525_353_fsp.sph +20051027_231329_354_fsp.sph +20051028_180329_355_fsp.sph +20051028_210350_358_fsp.sph +20051028_211904_359_fsp.sph +20051029_200218_363_fsp.sph +20051029_210442_364_fsp.sph +20051029_220538_366_fsp.sph +20051030_000333_367_fsp.sph +20051030_180521_368_fsp.sph +20051030_181001_369_fsp.sph +20051030_190231_370_fsp.sph +20051030_210903_372_fsp.sph +20051030_230444_373_fsp.sph +20051031_180213_374_fsp.sph +20051031_180906_375_fsp.sph +20051031_210229_377_fsp.sph +20051031_220447_379_fsp.sph +20051101_153940_380_fsp.sph +20051101_211314_384_fsp.sph +20051101_223911_387_fsp.sph +20051101_230216_388_fsp.sph +20051102_175957_390_fsp.sph +20051102_210243_394_fsp.sph +20051102_210828_395_fsp.sph +20051102_211130_396_fsp.sph +20051103_163507_398_fsp.sph +20051103_180920_400_fsp.sph +20051103_185102_401_fsp.sph +20051103_210539_403_fsp.sph +20051103_223906_405_fsp.sph +20051104_123901_407_fsp.sph +20051104_180145_408_fsp.sph +20051104_181437_409_fsp.sph +20051104_190247_410_fsp.sph +20051104_210307_411_fsp.sph +20051104_210814_412_fsp.sph +20051104_212121_413_fsp.sph +20051104_222117_414_fsp.sph +20051104_231424_416_fsp.sph +20051105_175657_418_fsp.sph +20051105_181203_419_fsp.sph +20051105_210724_421_fsp.sph +20051105_220745_422_fsp.sph +20051106_180232_424_fsp.sph +20051106_181321_425_fsp.sph +20051106_190219_426_fsp.sph +20051106_200213_427_fsp.sph +20051106_210215_428_fsp.sph +20051106_210310_429_fsp.sph +20051106_211252_430_fsp.sph +20051106_211804_431_fsp.sph +20051106_215339_432_fsp.sph +20051106_221653_433_fsp.sph +20051107_115855_434_fsp.sph +20051107_160351_435_fsp.sph +20051107_180332_436_fsp.sph +20051107_182401_437_fsp.sph +20051107_210309_439_fsp.sph +20051107_212723_440_fsp.sph +20051108_145902_441_fsp.sph +20051108_181424_442_fsp.sph +20051108_210224_443_fsp.sph +20051108_212018_444_fsp.sph +20051109_180413_446_fsp.sph +20051109_181432_447_fsp.sph +20051109_181906_448_fsp.sph +20051109_183631_449_fsp.sph +20051109_210436_451_fsp.sph +20051109_211151_452_fsp.sph +20051109_212148_453_fsp.sph +20051109_232505_454_fsp.sph +20051110_155523_455_fsp.sph +20051110_180208_456_fsp.sph +20051110_180838_457_fsp.sph +20051110_182221_459_fsp.sph +20051110_182318_460_fsp.sph +20051110_210200_461_fsp.sph +20051110_210233_462_fsp.sph +20051110_210454_463_fsp.sph +20051110_211110_464_fsp.sph +20051110_212818_466_fsp.sph +20051110_225245_467_fsp.sph +20051111_181441_471_fsp.sph +20051111_184451_474_fsp.sph +20051111_190326_475_fsp.sph +20051111_194004_477_fsp.sph +20051111_201357_478_fsp.sph +20051111_230329_480_fsp.sph +20051112_000305_482_fsp.sph +20051112_165916_483_fsp.sph +20051112_185651_487_fsp.sph +20051112_190443_488_fsp.sph +20051112_210205_489_fsp.sph +20051112_210631_490_fsp.sph +20051112_231502_491_fsp.sph +20051113_180809_493_fsp.sph +20051113_210908_497_fsp.sph +20051113_220433_499_fsp.sph +20051114_171942_502_fsp.sph +20051114_181118_504_fsp.sph +20051114_210412_506_fsp.sph +20051114_212032_507_fsp.sph +20051114_215057_508_fsp.sph +20051114_220412_509_fsp.sph +20051114_225557_510_fsp.sph +20051115_134012_511_fsp.sph +20051115_180301_512_fsp.sph +20051115_181412_513_fsp.sph +20051115_181731_514_fsp.sph +20051115_182149_515_fsp.sph +20051115_213551_517_fsp.sph +20051115_215935_518_fsp.sph +20051115_230749_520_fsp.sph +20051116_000221_521_fsp.sph +20051116_172353_522_fsp.sph +20051116_180237_524_fsp.sph +20051116_181228_525_fsp.sph +20051116_181816_526_fsp.sph +20051116_190450_527_fsp.sph +20051116_210146_528_fsp.sph +20051116_210553_529_fsp.sph +20051116_211222_530_fsp.sph +20051116_212312_531_fsp.sph +20051116_222454_532_fsp.sph +20051116_233038_533_fsp.sph +20051117_001013_534_fsp.sph +20051117_180234_535_fsp.sph +20051117_181844_537_fsp.sph +20051117_210156_538_fsp.sph +20051117_210403_539_fsp.sph +20051117_211540_540_fsp.sph +20051117_211833_541_fsp.sph +20051117_212855_542_fsp.sph +20051117_213407_543_fsp.sph +20051117_220412_544_fsp.sph +20051117_225943_545_fsp.sph +20051118_180619_547_fsp.sph +20051118_180739_548_fsp.sph +20051118_182114_549_fsp.sph +20051118_182652_550_fsp.sph +20051118_210212_551_fsp.sph +20051118_210455_552_fsp.sph +20051118_212058_553_fsp.sph +20051118_212829_554_fsp.sph +20051119_000355_555_fsp.sph +20051119_181105_556_fsp.sph +20051119_210802_557_fsp.sph +20051119_212315_559_fsp.sph +20051119_214926_560_fsp.sph +20051120_181008_561_fsp.sph +20051120_181339_562_fsp.sph +20051120_190412_563_fsp.sph +20051120_205645_565_fsp.sph +20051120_210347_566_fsp.sph +20051120_211526_567_fsp.sph +20051121_181138_569_fsp.sph +20051121_181357_570_fsp.sph +20051121_190155_571_fsp.sph +20051121_210922_573_fsp.sph +20051122_181114_574_fsp.sph +20051122_190326_576_fsp.sph +20051122_210253_577_fsp.sph +20051122_210703_578_fsp.sph +20051122_211805_579_fsp.sph +20051122_213037_580_fsp.sph +20051122_215430_581_fsp.sph +20051123_180926_582_fsp.sph +20051123_181644_583_fsp.sph +20051123_210214_584_fsp.sph +20051123_211514_585_fsp.sph +20051123_212412_586_fsp.sph +20051123_213259_587_fsp.sph +20051124_181720_588_fsp.sph +20051124_190336_589_fsp.sph +20051124_212221_591_fsp.sph +20051124_220457_592_fsp.sph +20051125_181632_593_fsp.sph +20051125_190327_594_fsp.sph +20051125_212150_595_fsp.sph +20051126_181804_597_fsp.sph +20051126_190347_598_fsp.sph +20051126_210222_599_fsp.sph +20051127_181335_601_fsp.sph +20051127_190405_602_fsp.sph +20051127_210516_603_fsp.sph +20051127_211200_604_fsp.sph +20051127_212516_605_fsp.sph +20051128_215149_608_fsp.sph +20051128_222007_609_fsp.sph +20051129_180204_610_fsp.sph +20051129_181241_612_fsp.sph +20051129_181547_613_fsp.sph +20051129_183449_614_fsp.sph +20051129_190152_615_fsp.sph +20051129_210218_616_fsp.sph +20051129_210342_617_fsp.sph +20051129_212711_618_fsp.sph +20051130_181543_619_fsp.sph +20051130_182626_620_fsp.sph +20051130_210202_622_fsp.sph +20051130_210910_623_fsp.sph +20051130_212724_626_fsp.sph +20051130_220121_627_fsp.sph +20051130_221538_628_fsp.sph +20051201_181034_630_fsp.sph +20051201_181303_631_fsp.sph +20051201_183429_632_fsp.sph +20051201_191426_633_fsp.sph +20051201_193415_634_fsp.sph +20051201_195005_635_fsp.sph +20051201_210713_636_fsp.sph +20051201_212329_637_fsp.sph +20051201_230640_638_fsp.sph +20051202_181119_639_fsp.sph +20051202_181659_640_fsp.sph +20051202_182058_641_fsp.sph +20051202_184713_642_fsp.sph +20051202_190154_643_fsp.sph +20051202_193515_644_fsp.sph +20051202_210252_645_fsp.sph +20051202_211824_646_fsp.sph +20051202_212105_647_fsp.sph +20051203_180701_649_fsp.sph +20051203_182100_650_fsp.sph +20051203_182132_651_fsp.sph +20051203_182418_652_fsp.sph +20051203_183501_653_fsp.sph +20051203_190503_654_fsp.sph +20051203_191125_655_fsp.sph +20051203_210216_656_fsp.sph +20051203_212114_658_fsp.sph +20051203_222533_661_fsp.sph +20051206_180753_662_fsp.sph +20051206_180911_663_fsp.sph +20051206_181649_664_fsp.sph +20051206_183057_665_fsp.sph +20051206_193937_667_fsp.sph +20051206_201757_668_fsp.sph +20051206_203158_669_fsp.sph +20051206_210127_670_fsp.sph +20051206_210744_671_fsp.sph +20051206_211522_672_fsp.sph +20051206_213252_673_fsp.sph +20051206_214122_674_fsp.sph +20051206_231328_675_fsp.sph +20051207_180507_676_fsp.sph +20051207_181020_677_fsp.sph +20051207_190155_678_fsp.sph +20051207_190426_679_fsp.sph +20051207_193103_681_fsp.sph +20051207_211858_683_fsp.sph +20051207_212300_684_fsp.sph +20051207_212831_685_fsp.sph +20051207_214411_686_fsp.sph +20051208_180208_687_fsp.sph +20051208_180810_688_fsp.sph +20051208_182430_689_fsp.sph +20051208_190333_690_fsp.sph +20051208_210609_691_fsp.sph +20051208_211702_692_fsp.sph +20051208_212444_694_fsp.sph +20051208_214100_696_fsp.sph +20051208_220606_697_fsp.sph +20051209_180824_699_fsp.sph +20051209_181542_700_fsp.sph +20051209_181642_701_fsp.sph +20051209_182541_702_fsp.sph +20051209_182858_703_fsp.sph +20051209_210136_704_fsp.sph +20051209_210452_705_fsp.sph +20051209_211542_706_fsp.sph +20051209_212515_707_fsp.sph +20051209_222427_709_fsp.sph +20051209_231702_710_fsp.sph +20051210_180659_711_fsp.sph +20051210_181201_712_fsp.sph +20051210_182013_713_fsp.sph +20051210_182603_714_fsp.sph +20051210_190201_715_fsp.sph +20051210_210535_717_fsp.sph +20051210_210735_718_fsp.sph +20051211_000414_719_fsp.sph +20051211_181346_720_fsp.sph +20051211_182045_721_fsp.sph +20051211_184252_723_fsp.sph +20051211_190523_724_fsp.sph +20051211_210240_725_fsp.sph +20051211_211415_726_fsp.sph +20051212_180251_727_fsp.sph +20051212_181817_728_fsp.sph +20051212_182453_729_fsp.sph +20051212_190335_730_fsp.sph +20051212_210527_731_fsp.sph +20051212_210738_732_fsp.sph +20051212_211419_733_fsp.sph +20051212_213447_734_fsp.sph +20051212_214512_735_fsp.sph +20051213_180254_736_fsp.sph +20051213_185913_737_fsp.sph +20051213_191741_738_fsp.sph +20051213_210120_739_fsp.sph +20051213_211552_741_fsp.sph +20051213_211953_742_fsp.sph +20051213_221424_743_fsp.sph +20051213_222016_744_fsp.sph +20051214_193942_746_fsp.sph +20051214_194606_747_fsp.sph +20051214_201000_748_fsp.sph +20051214_202717_749_fsp.sph +20051214_211653_750_fsp.sph +20051214_212318_751_fsp.sph +20051214_212718_752_fsp.sph +20051214_213225_753_fsp.sph +20051215_180855_754_fsp.sph +20051215_181731_755_fsp.sph +20051215_182213_756_fsp.sph +20051215_190143_757_fsp.sph +20051215_190419_758_fsp.sph +20051215_195526_759_fsp.sph +20051215_200925_760_fsp.sph +20051215_201639_761_fsp.sph +20051215_203848_762_fsp.sph +20051215_210410_764_fsp.sph +20051215_212456_766_fsp.sph +20051215_212701_767_fsp.sph +20051215_212749_768_fsp.sph +20051215_214814_769_fsp.sph +20051215_220537_770_fsp.sph +20051215_222306_771_fsp.sph +20051216_181042_773_fsp.sph +20051216_182340_774_fsp.sph +20051216_191101_775_fsp.sph +20051216_192823_776_fsp.sph +20051216_200153_777_fsp.sph +20051216_211423_778_fsp.sph +20051216_220626_779_fsp.sph +20051217_142547_780_fsp.sph +20051217_180231_781_fsp.sph +20051217_182026_783_fsp.sph +20051217_182330_784_fsp.sph +20051217_182530_785_fsp.sph +20051217_183115_786_fsp.sph +20051217_190226_787_fsp.sph +20051218_142845_790_fsp.sph +20051218_180353_791_fsp.sph +20051218_181751_792_fsp.sph +20051218_182127_793_fsp.sph +20051218_182750_794_fsp.sph +20051218_200401_799_fsp.sph +20051218_210249_800_fsp.sph +20051218_211820_801_fsp.sph +20051218_212444_802_fsp.sph +20051218_212813_803_fsp.sph +20051219_180225_804_fsp.sph +20051219_182110_806_fsp.sph +20051219_190625_808_fsp.sph +20051219_210655_812_fsp.sph +20051219_212218_813_fsp.sph +20051219_212716_814_fsp.sph +20051219_213203_815_fsp.sph +20051219_221213_816_fsp.sph +20051219_223123_817_fsp.sph +20051220_181731_820_fsp.sph +20051220_190121_821_fsp.sph +20051220_212400_826_fsp.sph +20051220_212718_828_fsp.sph +20051220_213420_829_fsp.sph +20051221_000417_830_fsp.sph +20051221_180958_831_fsp.sph +20051221_210452_840_fsp.sph +20051221_212325_841_fsp.sph +20051221_212911_842_fsp.sph +20051222_000436_843_fsp.sph +20051222_181242_845_fsp.sph +20051222_181506_846_fsp.sph +20051222_182617_847_fsp.sph +20051222_184209_849_fsp.sph +20051222_200553_850_fsp.sph +20051222_210309_852_fsp.sph +20051222_212425_855_fsp.sph +20051223_180346_856_fsp.sph +20051223_181050_857_fsp.sph +20051223_183105_860_fsp.sph +20051223_212547_863_fsp.sph +20051223_212853_864_fsp.sph +20051224_180302_865_fsp.sph +20051224_182949_867_fsp.sph +20051224_210150_870_fsp.sph +20051224_213010_871_fsp.sph +20051225_192042_872_fsp.sph +20051225_210556_873_fsp.sph +20051226_180908_874_fsp.sph +20051226_181659_875_fsp.sph +20051227_181058_885_fsp.sph +20051227_211308_887_fsp.sph +20051227_213029_888_fsp.sph +20051227_214843_889_fsp.sph +20051227_220309_890_fsp.sph +20051228_180249_891_fsp.sph +20051228_182051_892_fsp.sph +20051228_183955_893_fsp.sph +20051228_210524_896_fsp.sph +20051228_211808_897_fsp.sph +20051228_212304_899_fsp.sph +20051228_212734_900_fsp.sph +20051228_223227_901_fsp.sph +20051229_180231_902_fsp.sph +20051229_182614_906_fsp.sph +20051229_182631_907_fsp.sph +20051229_214024_909_fsp.sph +20051230_180457_910_fsp.sph +20051230_181721_912_fsp.sph +20051230_210412_913_fsp.sph +20051230_210559_914_fsp.sph +20051230_212557_915_fsp.sph +20051231_000808_916_fsp.sph +20060103_180314_917_fsp.sph +20060103_182107_918_fsp.sph +20060103_182257_919_fsp.sph +20060103_182549_920_fsp.sph +20060103_182654_921_fsp.sph +20060103_184037_922_fsp.sph +20060103_211504_925_fsp.sph +20060103_211732_926_fsp.sph +20060104_180509_928_fsp.sph +20060104_181040_929_fsp.sph +20060104_182115_930_fsp.sph +20060104_182644_931_fsp.sph +20060104_190448_933_fsp.sph +20060104_192707_934_fsp.sph +20060104_210223_935_fsp.sph +20060104_212844_936_fsp.sph +20060104_220148_937_fsp.sph +20060105_202127_943_fsp.sph +20060105_205957_944_fsp.sph +20060105_210951_945_fsp.sph +20060105_211743_946_fsp.sph +20060105_213129_947_fsp.sph +20060105_213243_948_fsp.sph +20060105_230711_949_fsp.sph +20060106_180202_950_fsp.sph +20060106_181040_951_fsp.sph +20060106_181726_952_fsp.sph +20060106_182909_953_fsp.sph +20060106_183056_954_fsp.sph +20060106_183550_955_fsp.sph +20060106_185224_956_fsp.sph +20060106_193129_957_fsp.sph +20060107_180634_960_fsp.sph +20060107_181553_961_fsp.sph +20060107_182715_962_fsp.sph +20060107_190206_963_fsp.sph +20060107_190415_964_fsp.sph +20060107_210435_966_fsp.sph +20060107_220739_967_fsp.sph +20060108_180630_968_fsp.sph +20060108_194731_971_fsp.sph +20060108_234917_976_fsp.sph +20060109_180448_977_fsp.sph +20060109_182557_979_fsp.sph +20060109_183636_980_fsp.sph +20060109_183727_981_fsp.sph +20060109_205815_982_fsp.sph +20060109_213409_986_fsp.sph +20060109_215138_987_fsp.sph +20060109_220315_988_fsp.sph +20060109_220535_989_fsp.sph +20060110_183405_995_fsp.sph +20060110_200611_998_fsp.sph +20060110_210730_1002_fsp.sph +20060110_213516_1004_fsp.sph +20060110_221920_1006_fsp.sph +20060110_230947_1007_fsp.sph +20060111_181650_1008_fsp.sph +20060111_182557_1009_fsp.sph +20060111_184916_1010_fsp.sph +20060111_192159_1012_fsp.sph +20060111_200345_1013_fsp.sph +20060111_210257_1014_fsp.sph +20060111_212145_1016_fsp.sph +20060111_213742_1017_fsp.sph +20060111_213936_1018_fsp.sph +20060111_230912_1020_fsp.sph +20060112_180639_1021_fsp.sph +20060112_182612_1022_fsp.sph +20060112_183346_1023_fsp.sph +20060112_183622_1024_fsp.sph +20060112_210747_1025_fsp.sph +20060112_211025_1026_fsp.sph +20060112_221010_1027_fsp.sph +20060112_221022_1028_fsp.sph +20060113_180159_1030_fsp.sph +20060113_183452_1033_fsp.sph +20060113_190403_1034_fsp.sph +20060113_213733_1036_fsp.sph +20060114_181137_1039_fsp.sph +20060114_181922_1040_fsp.sph +20060114_191056_1043_fsp.sph +20060114_213242_1044_fsp.sph +20060115_180421_1045_fsp.sph +20060115_183525_1047_fsp.sph +20060115_210217_1048_fsp.sph +20060115_212231_1051_fsp.sph +20060115_220504_1052_fsp.sph +20060115_232345_1053_fsp.sph +20060116_181908_1054_fsp.sph +20060116_182500_1055_fsp.sph +20060116_183201_1056_fsp.sph +20060116_184141_1057_fsp.sph +20060116_202324_1058_fsp.sph +20060116_204753_1059_fsp.sph +20060116_210217_1060_fsp.sph +20060116_211237_1061_fsp.sph +20060116_212845_1063_fsp.sph +20060116_220652_1064_fsp.sph +20060116_221118_1065_fsp.sph +20060117_181936_1068_fsp.sph +20060117_182604_1069_fsp.sph +20060117_185153_1071_fsp.sph +20060117_210138_1072_fsp.sph +20060117_210311_1073_fsp.sph +20060117_212546_1074_fsp.sph +20060118_180229_1076_fsp.sph +20060118_180647_1078_fsp.sph +20060118_182448_1079_fsp.sph +20060118_183010_1080_fsp.sph +20060118_190231_1082_fsp.sph +20060118_200148_1083_fsp.sph +20060118_205216_1084_fsp.sph +20060118_212907_1085_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/test b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/test new file mode 100644 index 00000000000..0cbc3cc95fd --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/test @@ -0,0 +1,20 @@ +sp_0053.sph +sp_0082.sph +sp_0084.sph +sp_0088.sph +sp_0681.sph +sp_0699.sph +sp_0776.sph +sp_0857.sph +sp_1031.sph +sp_1100.sph +sp_1148.sph +sp_1156.sph +sp_1186.sph +sp_1212.sph +sp_1345.sph +sp_1435.sph +sp_1578.sph +sp_1648.sph +sp_1807.sph +sp_1847.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/train b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/train new file mode 100644 index 00000000000..2c936072534 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/train @@ -0,0 +1,80 @@ +sp_0085.sph +sp_0096.sph +sp_0098.sph +sp_0100.sph +sp_0291.sph +sp_0713.sph +sp_0724.sph +sp_0726.sph +sp_0731.sph +sp_0733.sph +sp_0753.sph +sp_0788.sph +sp_0826.sph +sp_0831.sph +sp_0836.sph +sp_0841.sph +sp_0850.sph +sp_0855.sph +sp_0892.sph +sp_0899.sph +sp_0910.sph +sp_0917.sph +sp_0919.sph +sp_0923.sph +sp_0945.sph +sp_0950.sph +sp_0951.sph +sp_0992.sph +sp_0997.sph +sp_1013.sph +sp_1039.sph +sp_1044.sph +sp_1045.sph +sp_1058.sph +sp_1060.sph +sp_1063.sph +sp_1081.sph +sp_1106.sph +sp_1122.sph +sp_1140.sph +sp_1175.sph +sp_1195.sph +sp_1198.sph +sp_1231.sph +sp_1234.sph +sp_1255.sph +sp_1260.sph +sp_1261.sph +sp_1262.sph +sp_1264.sph +sp_1266.sph +sp_1273.sph +sp_1275.sph +sp_1284.sph +sp_1286.sph +sp_1304.sph +sp_1308.sph +sp_1333.sph +sp_1341.sph +sp_1353.sph +sp_1368.sph +sp_1379.sph +sp_1384.sph +sp_1449.sph +sp_1463.sph +sp_1574.sph +sp_1740.sph +sp_1759.sph +sp_1849.sph +sp_1908.sph +sp_1915.sph +sp_1918.sph +sp_1974.sph +sp_1976.sph +sp_1988.sph +sp_2000.sph +sp_2056.sph +sp_2070.sph +sp_2091.sph +sp_2101.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl new file mode 100755 index 00000000000..03193384670 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl @@ -0,0 +1,304 @@ +#!/usr/bin/env perl + +# Oct 21, 2015 : Gaurav Kumar (Johns Hopkins University) +# GNU General Public License, v3.0 +# +# This script was modified under GPL and is being distributed with +# Kaldi. It requires the preference and rule files +# (under LDC copyright) from LDC96L16. The main changes were +# - Outdated usage of perl conventions updated @_ => $_ or @A +# - This script no longer needs the preference and rule files to +# be in the same directory as this script. +# - Accepts tokens from instead of <> + +# --- Retained previous version information ---------------------------- +# spron.pl Version 0.1 Jan. 11 1995 +# Written by Zhibiao Wu, LDC, wzb@unagi.cis.upenn.edu +# This program needs the basic_rules file to run. The rules must be sorted +# in alphabetical order. The most specific rules should precede the more +# general ones. The conventions used in the basic rules are the same as +# regular expressions used in Perl. + +# Revised history: Feb. 10 1995 + +# The file "preferences" (assumed to be in your current directory) +# gives an "oracle" of correct pronunciations that override the +# machine-generated ones. + +# slightly changed 97/09/05 robertm: +# - look for basic_rules and preferences in $PWD instead of ~wzb/... +# - use next to shortcut loop instead of if/else +# - added a bit of documentation, without really trying to decipher this thing +# ----------------------------------------------------------------------- + +use utf8; +binmode(STDIN, ":utf8"); +binmode(STDOUT, ":utf8"); + +$vfile = ""; +$preference_file = ""; +$rules_file = ""; +$print_input = 0; +if ($#ARGV < 1) { + # Print Usage + print "Usage : local/spron.pl pref-file rules-file \n"; + exit 1; +} else { + $preference_file = $ARGV[0]; + $rules_file = $ARGV[1]; + if ($#ARGV > 1) { + $vfile = $ARGV[2]; + } + if ($#ARGV > 2) { + $print_input = 1; + } +} + +$rule_num = 0; +$previous = ""; +if ($vfile ne "") { + open(VF, $vfile) || die "Can't find file $vfile!\n"; + while () { + chop; + @A = split(//); + if (($A[0] ne '#') && ($_ ne "")) { + if (/(\S+)\s*->\s*(\S*)\s*:\s*(\S*)\s*__\s*(\S*)\s*(#?)/) { + $head[$rule_num] = $1; + $end[$rule_num] = $2; + $pre[$rule_num] = $3; + if ($4 =~ /#/) { + $nex[$rule_num] = ""; + $some[$rule_num] = $4; + } else { + $nex[$rule_num] = $4; + $some[$rule_num] = $5; + } + if ($previous ne substr($head[$rule_num],0,1)) { + $first{$head[$rule_num]} = $rule_num; + $last{$previous} = $rule_num - 1; + } + $previous = substr($head[$rule_num++],0,1); + } else { + print "Rule format error: Cannot parse $_\n"; + exit(1); + } + } + } + $last{$previous} = $rule_num - 1; + + close(VF); +} + +open(PF, $preference_file) || die "Can't read `preferences' file"; +binmode(PF, ":iso88591"); +while () { + chop; + if ($_ ne "") { + @A = split; + $pron{$A[0]} = $A[1]; + $stre{$A[0]} = $A[2]; + } +} + +$previous = ""; +$brule_num = 0; +open(BF, $rules_file) || die "Can't read `basic_rules' file"; +binmode(BF, ":iso88591"); +while () { + chop; + @A = split(//); + if (($A[0] ne '#') && ($_ ne "")) { + if (/(\S+)\s*->\s*(\S*)\s*:\s*(\S*)\s*__\s*(\S*)\s*(#?)/) { + $bhead[$brule_num] = $1; + $bend[$brule_num] = $2; + $bpre[$brule_num] = $3; + if ($4 =~ /#/) { + $bnex[$brule_num] = ""; + $bsome[$brule_num] = $4; + } else { + $bnex[$brule_num] = $4; + $bsome[$brule_num] = $5; + } + if ($previous ne substr($bhead[$brule_num],0,1)) { + $bfirst{substr($bhead[$brule_num],0,1)} = $brule_num; + $blast{$previous} = $brule_num - 1; + } + $previous = substr($bhead[$brule_num++],0,1); + } else { + print "Rule format error in file basic_rules: Cannot parse $_\n"; + exit(1); + } + } +} +$blast{$previous} = $brule_num - 1; +close(BF); + +if ($brule_num == 0) { + print "No basic rules, Program exit!\n"; + exit(1); +} + +while(){ + next if ((/^#/) || (/^\s*$/) ); + chop; + if ($print_input) { + print $_, "\t"; + } + if ($pron{$_}) { + # print answer from preferences and skip to next word + print "$pron{$_}\t$stre{$_}\n"; + next; + } + $original = $_; + tr/A-ZÁÉÍÓÚÏÜÑ/a-záéíóúïüñ/; + $orig = "#" . $_ . "#"; + + @l = (); + + push(@l,split("",$orig)); + + @pron = &transfer(1); + + foreach (@pron) { + $a = $_; + y/aeiouáéíóú//cd; + if ($_ eq "") { + print "#No stressable vowel in $original\n"; + } else { + s/[aeiou]/0/go; + s/[áéíóú]/1/go; + if (!/1/) { + if(length() == 1){ + s/\b./1/o; + } elsif($l[$#l - 1] =~ /[aeiouns]/o){ + s/00\b/10/o; + } else { + s/0\b/1/o; + } + } + + $a =~ s/á/a/g; + $a =~ s/é/e/g; + $a =~ s/í/i/g; + $a =~ s/ó/o/g; + $a =~ s/ú/u/g; + + print "$a\t$_\n"; + } + } +} + +sub transfer{ + local($_) = @_; + local(@p) = (); + local($s) = 0; + local($over) = 0; + local($i,$j,$k) = (0,0,0); + + if ($_ >= length($orig) - 1) { + push(@p, ""); + return(@p); + } else { + + if ($vfile ne "") { + for ($i= $first{substr($orig, $_, 1)}; + $i <= $last{substr($orig, $_, 1)} ; $i++) { + if (&matchv($_,$i)) { + $s = $_ + length($head[$i]); + foreach $w (&transfer($s)) { + push(@p, $end[$i] . $w); + if ($some[$i] ne "") { + $over = 0; + } else { + $over = 1; + } + } + } + } + } + + if ($over == 0 ) { + $i = $bfirst{substr($orig, $_, 1)}; + while (($i <= $blast{substr($orig, $_, 1)}) && ($over == 0)) { + if (&matchb($_,$i)) { + $over = 1; + $s = $_ + length($bhead[$i]); + foreach $w (&transfer($s)) { + push(@p, $bend[$i] . $w); + } + } + $i++; + } + if ($over == 0) { + $s = $_ + 1; + foreach $w (&transfer($s)) { + push(@p, substr($orig,$_,1) . $w); + } + } + } + + return(@p); + } +} + +sub matchv { + $h = $head[$_[1]]; + $p = $pre[$_[1]]; + $n = $nex[$_[1]]; + + return(&match($_[0],$h,$p,$n)); + +} + +sub matchb { + $h = $bhead[$_[1]]; + $p = $bpre[$_[1]]; + $n = $bnex[$_[1]]; + + return(&match($_[0],$h,$p,$n)); + +} + +sub match { + + if (substr($orig, $_[0], length($_[1])) eq $_[1]) { + return ( &match_n($_[0] + length($_[1]) - 1, $_[3]) && + &match_p($_[0], $_[2])); + } else { + return (0); + } +} + +sub match_p { + local($a) = $_[0]; + local($b) = $_[1]; + local($_); + + if ($b eq "" ) { + return (1); + } else { + $_ = substr($orig, 0, $a) . "!"; + if (/($b)!/) { + return(1); + } else { + return(0); + } + } +} + +sub match_n { + local($a) = $_[0]; + local($b) = $_[1]; + local($_); + + if ($b eq "" ) { + return (1); + } else { + $_ = "!" . substr($orig, $a + 1, length($orig) - $a - 1); + if (/!($b)/) { + return(1); + } else { + return(0); + } + } +} diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh new file mode 100755 index 00000000000..9f5855d56c4 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# The input is a subset of the dataset in use. (*.sph files) +# In addition the transcripts are needed as well. +# This script is only called internally and should not be +# used for any other purpose. A similar script for general usage +# is local/fsp_data_prep.sh +# To be run from one directory above this script. + +stage=0 + +export LC_ALL=C + + +if [ $# -lt 4 ]; then + echo "Arguments should be the location of the Spanish Fisher Speech and Transcript Directories and the name of this partition +, and a list of files that belong to this partition . see ../run.sh for example." + exit 1; +fi + +subset=$3 +dir=`pwd`/data/local/$subset/data +mkdir -p $dir +local=`pwd`/local +utils=`pwd`/utils +tmpdir=`pwd`/data/local/tmp +mkdir -p $tmpdir + +. ./path.sh || exit 1; # Needed for KALDI_ROOT +export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin +sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +if [ ! -x $sph2pipe ]; then + echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; + exit 1; +fi +cd $dir + +# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command +# line arguments being absolute pathnames. +rm -r links/ 2>/dev/null +mkdir links/ +mkdir links/speech +mkdir links/transcripts +if [ ! -f $4 ]; then + echo "Please specify a valid parition file. Could not find $4" + exit 1; +fi +cat $4 | sed 's:.*/::g' | \ +xargs -I % find $1/ -name %* | xargs -I % echo cp % links/ + +# Basic spot checks to see if we got the data that we needed +if [ ! -d links/LDC2010S01 -o ! -d links/LDC2010T04 ]; +then + echo "The speech and the data directories need to be named LDC2010S01 and LDC2010T04 respecti +vely" + exit 1; +fi + +if [ ! -d links/LDC2010S01/DISC1/data/speech -o ! -d links/LDC2010S01/DISC2/data/speech ]; +then + echo "Disc 1 and 2 directories missing or not properly organised within the speech data dir" + echo "Typical format is LDC2010S01/DISC?/data/speech" + exit 1; +fi + +#Check the transcripts directories as well to see if they exist +if [ ! -d links/LDC2010T04/data/transcripts ]; +then + echo "Transcript directories missing or not properly organised" + echo "Typical format is LDC2010T04/data/transcripts" + exit 1; +fi + +speech_d1=$dir/links/LDC2010S01/DISC1/data/speech +speech_d2=$dir/links/LDC2010S01/DISC2/data/speech +transcripts=$dir/links/LDC2010T04/data/transcripts + +fcount_d1=`find ${speech_d1} -iname '*.sph' | wc -l` +fcount_d2=`find ${speech_d2} -iname '*.sph' | wc -l` +fcount_t=`find ${transcripts} -iname '*.tdf' | wc -l` +#TODO:it seems like not all speech files have transcripts +#Now check if we got all the files that we needed +if [ $fcount_d1 != 411 -o $fcount_d2 != 408 -o $fcount_t != 819 ]; +then + echo "Incorrect number of files in the data directories" + echo "DISC1 and DISC2 should contain 411 and 408 .sph files respectively" + echo "The transcripts should contain 819 files" + exit 1; +fi + +if [ $stage -le 0 ]; then + #Gather all the speech files together to create a file list + #TODO: Train and test split might be required + ( + find $speech_d1 -iname '*.sph'; + find $speech_d2 -iname '*.sph'; + ) > $tmpdir/train_sph.flist + + #Get all the transcripts in one place + find $transcripts -iname '*.tdf' > $tmpdir/train_transcripts.flist +fi + +if [ $stage -le 1 ]; then + $local/fsp_make_trans.pl $tmpdir + mkdir -p $dir/train_all + mv $tmpdir/reco2file_and_channel $dir/train_all/ +fi + +if [ $stage -le 2 ]; then + sort $tmpdir/text.1 | grep -v '((' | \ + awk '{if (NF > 1){ print; }}' | \ + sed 's:<\s*[/]*\s*\s*for[ei][ei]g[nh]\s*\w*>::g' | \ + sed 's:\([^<]*\)<\/lname>:\1:g' | \ + sed 's:::g' | \ + sed 's:[^<]*<\/laugh>:[laughter]:g' | \ + sed 's:<\s*cough[\/]*>:[noise]:g' | \ + sed 's::[noise]:g' | \ + sed 's::[noise]:g' | \ + sed 's::[noise]:g' | \ + sed 's:[^<]*<\/background>:[noise]:g' | \ + sed -r 's:<[/]?background[/]?>:[noise]:g' | \ + #One more time to take care of nested stuff + sed 's:[^<]*<\/laugh>:[laughter]:g' | \ + sed -r 's:<[/]?laugh[/]?>:[laughter]:g' | \ + #now handle the exceptions, find a cleaner way to do this? + sed 's:::g' | \ + sed 's:::g' | \ + sed 's:foreign>::g' | \ + sed 's:>::g' | \ + #How do you handle numbers? + grep -v '()' | \ + #Now go after the non-printable characters + sed -r 's:¿::g' > $tmpdir/text.2 + cp $tmpdir/text.2 $dir/train_all/text + + #Create segments file and utt2spk file + ! cat $dir/train_all/text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > $dir/train_all/utt2spk \ + && echo "Error producing utt2spk file" && exit 1; + + cat $dir/train_all/text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; + $s = sprintf("%.2f", 0.01*$3); $e = sprintf("%.2f", 0.01*$4); print "$utt $reco $s $e\n"; ' >$dir/train_all/segments + + $utils/utt2spk_to_spk2utt.pl <$dir/train_all/utt2spk > $dir/train_all/spk2utt +fi + +if [ $stage -le 3 ]; then + cat $tmpdir/train_sph.flist | perl -ane 'm:/([^/]+)\.sph$: || die "bad line $_; "; print "$1 $_"; ' > $tmpdir/sph.scp + cat $tmpdir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ + sort -k1,1 -u > $dir/train_all/wav.scp || exit 1; +fi + +if [ $stage -le 4 ]; then + # Build the speaker to gender map, the temporary file with the speaker in gender information is already created by fsp_make_trans.pl. + cat $tmpdir/spk2gendertmp | sort | uniq > $dir/train_all/spk2gender +fi + +echo "Fisher Spanish Data preparation succeeded." + +exit 1; + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py b/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py new file mode 100755 index 00000000000..ce83fa8c8aa --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +import os +import sys + +files = [ +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-1/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-2/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-3/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-4/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-5/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-6/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-7/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-8/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-9/exp/tri5a/decode_test/scoring/13.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-10/exp/tri5a/decode_test/scoring/13.tra')] + +def findTranscription(timeDetail): + + for file1 in files: + file1.seek(0,0) + for line in file1: + lineComp = line.split() + if lineComp[0] == timeDetail: + return " ".join(lineComp[1:]) + # No result found + return -1 + + +wordsFile = open('exp/tri5a/graph/words.txt') +words = {} + +# Extract word list +for line in wordsFile: + lineComp = line.split() + words[int(lineComp[1])] = lineComp[0].strip() + +# Now read list of files in conversations +fileList = [] +#conversationList = open('/export/a04/gkumar/corpora/fishcall/joshkal-splits/provisional_dev') +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/train') +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# TODO: Make sure they match the order in which these english files are being written + +# Now get timing information to concatenate the ASR outputs +if not os.path.exists('exp/tri5a/one-best/train'): + os.makedirs('exp/tri5a/one-best/train') + +#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.train', 'w+') +for item in fileList: + timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') + newFile = open('exp/tri5a/one-best/train/' + item + '.es', 'w+') + for line in timingFile: + timeInfo = line.split() + mergedTranslation = "" + for timeDetail in timeInfo: + #Locate this in ASR dev/test, this is going to be very slow + tmp = findTranscription(timeDetail) + if tmp != -1: + mergedTranslation = mergedTranslation + " " + tmp + mergedTranslation = mergedTranslation.strip() + transWords = [words[int(x)] for x in mergedTranslation.split()] + newFile.write(" ".join(transWords) + "\n") + provFile.write(" ".join(transWords) + "\n") + newFile.close() +provFile.close() + + + + + + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py b/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py new file mode 100755 index 00000000000..b9f906b27da --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +from __future__ import print_function +import os +import sys +import subprocess + +latticeLocation = {1:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-1/latjosh-2/lattices-pushed/", +2:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-2/latjosh-2/lattices-pushed/", +3:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-3/latjosh-2/lattices-pushed/", +4:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-4/latjosh-2/lattices-pushed/", +5:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-5/latjosh-2/lattices-pushed/", +6:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-6/latjosh-2/lattices-pushed/", +7:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-7/latjosh-2/lattices-pushed/", +8:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-8/latjosh-2/lattices-pushed/", +9:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-9/latjosh-2/lattices-pushed/", +10:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-10/latjosh-2/lattices-pushed/"} + +latticeDict = {} + +for key,location in latticeLocation.items(): + for root, dirs, filenames in os.walk(location): + for f in filenames: + latticeDict[f] = str(key) + +tmpdir = 'data/local/data/tmp/lattmp' +if not os.path.exists(tmpdir): + os.makedirs(tmpdir) +invalidplfdir = 'data/local/data/tmp/invalidplf' +if not os.path.exists(invalidplfdir): + os.makedirs(invalidplfdir) +else: + os.system("rm " + invalidplfdir + "/*") + +def latticeConcatenate(lat1, lat2): + ''' + Concatenates lattices, writes temporary results to tmpdir + ''' + if lat1 == "": + if os.path.exists('rm ' + tmpdir + '/tmp.lat'): + os.system('rm ' + tmpdir + '/tmp.lat') + return lat2 + else: + proc = subprocess.Popen(['fstconcat', lat1, lat2, (tmpdir + '/tmp.lat')]) + proc.wait() + return tmpdir + '/tmp.lat' + + +def findLattice(timeDetail): + ''' + Finds the lattice corresponding to a time segment + ''' + searchKey = timeDetail + '.lat' + if searchKey in latticeDict: + return "/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-" + latticeDict[searchKey] + "/latjosh-2/lattices-pushed/" + searchKey + else: + return -1 + + +# Now read list of files in conversations +fileList = [] +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/train') +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# Now get timing information to concatenate the ASR outputs + +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.train.plf', 'w+') +lineNo = 1 +invalidPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/invalidPLF', 'w+') +blankPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/blankPLF', 'w+') +rmLines = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/removeLines', 'w+') +for item in fileList: + timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') + for line in timingFile: + timeInfo = line.split() + + # For utterances that are concatenated in the translation file, + # the corresponding FSTs have to be translated as well + mergedTranslation = "" + for timeDetail in timeInfo: + tmp = findLattice(timeDetail) + if tmp != -1: + # Concatenate lattices + mergedTranslation = latticeConcatenate(mergedTranslation, tmp) + + if mergedTranslation != "": + + # Sanjeev's Recipe : Remove epsilons and topo sort + finalFST = tmpdir + "/final.fst" + os.system("fstrmepsilon " + mergedTranslation + " | fsttopsort - " + finalFST) + + # Now convert to PLF + proc = subprocess.Popen('/export/a04/gkumar/corpora/fishcall/bin/fsm2plf.sh /export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/data/lang/words.clean.txt ' + finalFST, stdout=subprocess.PIPE, shell=True) + PLFline = proc.stdout.readline() + finalPLFFile = tmpdir + "/final.plf" + finalPLF = open(finalPLFFile, "w+") + finalPLF.write(PLFline) + finalPLF.close() + + # now check if this is a valid PLF, if not write it's ID in a + # file so it can be checked later + proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True) + line = proc.stdout.readline() + print("{} {}".format(line, lineNo)) + if line.strip() != "PLF format appears to be correct.": + os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0]) + invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n") + rmLines.write("{}\n".format(lineNo)) + else: + provFile.write(PLFline) + else: + blankPLF.write(timeInfo[0] + "\n") + rmLines.write("{}\n".format(lineNo)) + # Now convert to PLF + lineNo += 1 + +provFile.close() +invalidPLF.close() +blankPLF.close() +rmLines.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh new file mode 100755 index 00000000000..29fbeebace6 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +stage=-2 +num_words_pocolm=110000 +prune_size=1000000 + +. ./path_venv.sh +. ./cmd.sh +. ./utils/parse_options.sh + +set -euo pipefail + +export POCOLM_ROOT=$(cd $KALDI_ROOT/tools/pocolm/; pwd -P) +export PATH=$PATH:$POCOLM_ROOT/scripts + +textdir=$1 +pocolm_dir=$2 + + +if [ $stage -le -2 ];then + if [ -e "$textdir"/unigram_weights ]; then + rm "$textdir"/unigram_weights + fi + + if [ -e "$pocolm_dir" ]; then + rm -r "$pocolm_dir" + fi + + bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ + --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" +fi + +if [ $stage -le -1 ];then + prune_lm_dir.py --target-num-ngrams=${prune_size} --max-memory=8G "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned + format_arpa_lm.py --max-memory=8G "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned | gzip -c > "$pocolm_dir"/arpa/"$num_words_pocolm"_3.pocolm_pruned_${prune_size}.arpa.gz +fi + + +exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py b/egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py new file mode 100755 index 00000000000..3f6444da294 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# Copyright 2014 Gaurav Kumar. Apache 2.0 + +import os +import sys + +files = [ +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-1/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-2/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-3/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-4/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-5/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-6/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-7/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-8/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-9/exp/tri5a/decode_test/oracle/oracle.tra'), +open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-10/exp/tri5a/decode_test/oracle/oracle.tra')] + +def findTranscription(timeDetail): + + for file1 in files: + file1.seek(0,0) + for line in file1: + lineComp = line.split() + if lineComp[0] == timeDetail: + return " ".join(lineComp[1:]) + # No result found + return -1 + + +wordsFile = open('exp/tri5a/graph/words.txt') +words = {} + +# Extract word list +for line in wordsFile: + lineComp = line.split() + words[int(lineComp[1])] = lineComp[0].strip() + +# Now read list of files in conversations +fileList = [] +#conversationList = open('/export/a04/gkumar/corpora/fishcall/joshkal-splits/provisional_dev') +conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/train') +for line in conversationList: + line = line.strip() + line = line[:-4] + fileList.append(line) + +# IN what order were the conversations added to the spanish files? +# TODO: Make sure they match the order in which these english files are being written + +# Now get timing information to concatenate the ASR outputs +if not os.path.exists('exp/tri5a/one-best/train'): + os.makedirs('exp/tri5a/one-best/train') + +#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') +provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.train.oracle', 'w+') +for item in fileList: + timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') + newFile = open('exp/tri5a/one-best/train/' + item + '.es', 'w+') + for line in timingFile: + timeInfo = line.split() + mergedTranslation = "" + for timeDetail in timeInfo: + #Locate this in ASR dev/test, this is going to be very slow + tmp = findTranscription(timeDetail) + if tmp != -1: + mergedTranslation = mergedTranslation + " " + tmp + mergedTranslation = mergedTranslation.strip() + transWords = [words[int(x)] for x in mergedTranslation.split()] + newFile.write(" ".join(transWords) + "\n") + provFile.write(" ".join(transWords) + "\n") + newFile.close() +provFile.close() + + + + + + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter b/egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter new file mode 100755 index 00000000000..4fce42945b3 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter @@ -0,0 +1,5 @@ +#!/bin/sed -f +s:\[laughter\]::g +s:\[noise\]::g +s:\[oov\]::g +s:::g diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path.sh b/egs/fisher_callhome_spanish/s5_gigaword/path.sh new file mode 100755 index 00000000000..2fc3de37406 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/path.sh @@ -0,0 +1,13 @@ +export KALDI_ROOT=`pwd`/../../../../kaldi +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LD_LIBRARY_PATH=/home/dpovey/libs + +export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk +export PATH=$SPARROWHAWK_ROOT/bin:$PATH +export LC_ALL=C.UTF-8 +export LANG=C.UTF-8 + + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh b/egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh new file mode 100755 index 00000000000..80edbbaf69a --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh @@ -0,0 +1,13 @@ +export KALDI_ROOT=`pwd`/../../../../kaldi +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LD_LIBRARY_PATH=/home/dpovey/libs + +export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk +export PATH=$SPARROWHAWK_ROOT/bin:$PATH +export LC_ALL=C.UTF-8 +export LANG=C.UTF-8 + +source ~/anaconda/bin/activate py36 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/rnnlm b/egs/fisher_callhome_spanish/s5_gigaword/rnnlm new file mode 120000 index 00000000000..fb754622d5e --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/rnnlm @@ -0,0 +1 @@ +../../wsj/s5/rnnlm \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh new file mode 100755 index 00000000000..5f7068072f3 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -0,0 +1,299 @@ +#!/bin/bash +# +# Copyright 2018 Nagendra Goel, Saikiran Valluri Apache 2.0 +# Copyright 2014 Gaurav Kumar. Apache 2.0 +# Recipe for Fisher/Callhome-Spanish + +stage=-1 +lmstage=-2 +train_sgmm2=false + +# call the next line with the directory where the Spanish Fisher data is +# (the values below are just an example). +sfisher_speech=/export/corpora/LDC/LDC2010S01 +sfisher_transcripts=/export/corpora/LDC/LDC2010T04 +spanish_lexicon=/export/corpora/LDC/LDC96L16 +split=local/splits/split_fisher + +callhome_speech=/export/corpora/LDC/LDC96S35 +callhome_transcripts=/export/corpora/LDC/LDC96T17 +split_callhome=local/splits/split_callhome + +gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data # Path to the download of Gigaword data +rnnlm_workdir=/export/c03/svalluri/workdir_rnnlm # Work path for entire Gigaword LM and text processing, should be + # large free spae and easy IO access. +mfccdir=`pwd`/mfcc + +. ./cmd.sh +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +set -eou pipefail + +if [ $stage -le -1 ]; then + local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts + local/callhome_data_prep.sh $callhome_speech $callhome_transcripts + + # The lexicon is created using the LDC spanish lexicon, the words from the + # fisher spanish corpus. Additional (most frequent) words are added from the + # ES gigaword corpus to bring the total to 64k words. The ES frequency sorted + # wordlist is downloaded if it is not available. + local/fsp_prepare_dict.sh $spanish_lexicon + + # Added c,j, v to the non silences phones manually + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang + + # Make sure that you do not use your test and your dev sets to train the LM + # Some form of cross validation is possible where you decode your dev/set based on an + # LM that is trained on everything but that that conversation + # When in doubt about what your data partitions should be use local/fsp_ideal_data_partitions.pl + # to get the numbers. Depending on your needs, you might have to change the size of + # the splits within that file. The default paritions are based on the Kaldi + Joshua + # requirements which means that I have very large dev and test sets + local/fsp_train_lms.sh $split + local/fsp_create_test_lang.sh + + utils/fix_data_dir.sh data/local/data/train_all + + steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/local/data/train_all exp/make_mfcc/train_all $mfccdir || exit 1; + + utils/fix_data_dir.sh data/local/data/train_all + utils/validate_data_dir.sh data/local/data/train_all + + cp -r data/local/data/train_all data/train_all + + # For the CALLHOME corpus + utils/fix_data_dir.sh data/local/data/callhome_train_all + + steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/local/data/callhome_train_all exp/make_mfcc/callhome_train_all $mfccdir || exit 1; + + utils/fix_data_dir.sh data/local/data/callhome_train_all + utils/validate_data_dir.sh data/local/data/callhome_train_all + + cp -r data/local/data/callhome_train_all data/callhome_train_all + + # Creating data partitions for the pipeline + # We need datasets for both the ASR and SMT system + # We have 257455 utterances left, so the partitions are roughly as follows + # ASR Train : 100k utterances + # ASR Tune : 17455 utterances + # ASR Eval : 20k utterances + # MT Train : 100k utterances + # MT Tune : Same as the ASR eval set (Use the lattices from here) + # MT Eval : 20k utterances + # The dev and the test sets need to be carefully chosen so that there is no conversation/speaker + # overlap. This has been setup and the script local/fsp_ideal_data_partitions provides the numbers that are needed below. + # As noted above, the LM has not been trained on the dev and the test sets. + #utils/subset_data_dir.sh --first data/train_all 158126 data/dev_and_test + #utils/subset_data_dir.sh --first data/dev_and_test 37814 data/asr_dev_and_test + #utils/subset_data_dir.sh --last data/dev_and_test 120312 data/mt_train_and_test + #utils/subset_data_dir.sh --first data/asr_dev_and_test 17662 data/dev + #utils/subset_data_dir.sh --last data/asr_dev_and_test 20152 data/test + #utils/subset_data_dir.sh --first data/mt_train_and_test 100238 data/mt_train + #utils/subset_data_dir.sh --last data/mt_train_and_test 20074 data/mt_test + #rm -r data/dev_and_test + #rm -r data/asr_dev_and_test + #rm -r data/mt_train_and_test + + local/create_splits.sh $split + local/callhome_create_splits.sh $split_callhome +fi + +if [ $stage -le 0 ]; then + mkdir -p "$rnnlm_workdir"/gigaword_rawtext + local/flatten_gigaword/flatten_all_gigaword.sh "$gigaword_datapath" "$rnnlm_workdir"/flattened_gigaword_corpus 24 + cat "$rnnlm_workdir"/flattened_gigaword_corpus/*.flat > "$rnnlm_workdir"/gigaword_rawtext/in.txt + local/clean_txt_dir.sh "$rnnlm_workdir"/gigaword_rawtext/ \ + "$rnnlm_workdir"/normalised_gigaword_corpus/ + mkdir -p "$rnnlm_workdir"/text_lm + cut -d " " -f 2- data/train/text > "$rnnlm_workdir"/text_lm/train.txt + cut -d " " -f 2- data/dev2/text > "$rnnlm_workdir"/text_lm/dev.txt # For RNNLM and POCOLM training we use dev2/text as dev file. + cp "$rnnlm_workdir"/normalised_gigaword_corpus/text_normalized "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt +fi + + +if [ $stage -le 1 ]; then + num_words_pocolm=110000 + local/train_pocolm.sh --stage $lmstage --num-words-pocolm 110000 "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm + cat "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt > "$rnnlm_workdir"/rnnlm_wordlist.txt + cut -f 1 -d " " data/lang/words.txt >> "$rnnlm_workdir"/rnnlm_wordlist.txt + cat "$rnnlm_workdir"/rnnlm_wordlist.txt | sort | uniq > "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq + local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/100000_3.pocolm \ + --wordslist "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq --text "$rnnlm_workdir"/text_lm --text-dir "$rnnlm_workdir"/text_lm +fi + +if [ $stage -le 2 ]; then + # Now compute CMVN stats for the train, dev and test subsets + steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir + steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir + steps/compute_cmvn_stats.sh data/dev2 exp/make_mfcc/dev2 $mfccdir + #steps/compute_cmvn_stats.sh data/mt_train exp/make_mfcc/mt_train $mfccdir + #steps/compute_cmvn_stats.sh data/mt_test exp/make_mfcc/mt_test $mfccdir + + #n=$[`cat data/train_all/segments | wc -l` - 158126] + #utils/subset_data_dir.sh --last data/train_all $n data/train + steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir + + steps/compute_cmvn_stats.sh data/callhome_dev exp/make_mfcc/callhome_dev $mfccdir + steps/compute_cmvn_stats.sh data/callhome_test exp/make_mfcc/callhome_test $mfccdir + steps/compute_cmvn_stats.sh data/callhome_train exp/make_mfcc/callhome_train $mfccdir + + # Again from Dan's recipe : Reduced monophone training data + # Now-- there are 1.6 million utterances, and we want to start the monophone training + # on relatively short utterances (easier to align), but not only the very shortest + # ones (mostly uh-huh). So take the 100k shortest ones, and then take 10k random + # utterances from those. + + utils/subset_data_dir.sh --shortest data/train 90000 data/train_100kshort + utils/subset_data_dir.sh data/train_100kshort 10000 data/train_10k + utils/data/remove_dup_utts.sh 100 data/train_10k data/train_10k_nodup + utils/subset_data_dir.sh --speakers data/train 30000 data/train_30k + utils/subset_data_dir.sh --speakers data/train 90000 data/train_100k +fi + +if [ $stage -le 3 ]; then + steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ + data/train_10k_nodup data/lang exp/mono0a + + steps/align_si.sh --nj 30 --cmd "$train_cmd" \ + data/train_30k data/lang exp/mono0a exp/mono0a_ali || exit 1; + + steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 20000 data/train_30k data/lang exp/mono0a_ali exp/tri1 || exit 1; + + + (utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph + steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri1/graph data/dev exp/tri1/decode_dev)& + + steps/align_si.sh --nj 30 --cmd "$train_cmd" \ + data/train_30k data/lang exp/tri1 exp/tri1_ali || exit 1; + + steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 20000 data/train_30k data/lang exp/tri1_ali exp/tri2 || exit 1; + + ( + utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; + steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1; + )& +fi + +if [ $stage -le 4 ]; then + steps/align_si.sh --nj 30 --cmd "$train_cmd" \ + data/train_100k data/lang exp/tri2 exp/tri2_ali || exit 1; + +# Train tri3a, which is LDA+MLLT, on 100k data. + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" \ + 3000 40000 data/train_100k data/lang exp/tri2_ali exp/tri3a || exit 1; + ( + utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; + steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; + )& +fi + +if [ $stage -le 5 ]; then +# Next we'll use fMLLR and train with SAT (i.e. on +# fMLLR features) + steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ + data/train_100k data/lang exp/tri3a exp/tri3a_ali || exit 1; + + steps/train_sat.sh --cmd "$train_cmd" \ + 4000 60000 data/train_100k data/lang exp/tri3a_ali exp/tri4a || exit 1; + + ( + utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri4a/graph data/dev exp/tri4a/decode_dev +)& + + + steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ + data/train data/lang exp/tri4a exp/tri4a_ali || exit 1; + +# Reduce the number of gaussians + steps/train_sat.sh --cmd "$train_cmd" \ + 5000 120000 data/train data/lang exp/tri4a_ali exp/tri5a || exit 1; + + ( + utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/dev exp/tri5a/decode_dev + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/test exp/tri5a/decode_test + + # Decode CALLHOME + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/callhome_test exp/tri5a/decode_callhome_test + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/callhome_dev exp/tri5a/decode_callhome_dev + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/callhome_train exp/tri5a/decode_callhome_train + ) & + + + steps/align_fmllr.sh \ + --boost-silence 0.5 --nj 32 --cmd "$train_cmd" \ + data/train data/lang exp/tri5a exp/tri5a_ali +fi + +if $train_sgmm2; then + +steps/train_ubm.sh \ + --cmd "$train_cmd" 750 \ + data/train data/lang exp/tri5a_ali exp/ubm5 + +steps/train_sgmm2.sh \ + --cmd "$train_cmd" 5000 18000 \ + data/train data/lang exp/tri5a_ali exp/ubm5/final.ubm exp/sgmm5 + +utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph + +( + steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \ + --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \ + exp/sgmm5/graph data/dev exp/sgmm5/decode_dev +)& + +steps/align_sgmm2.sh \ + --nj 32 --cmd "$train_cmd" --transform-dir exp/tri5a_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + +steps/make_denlats_sgmm2.sh \ + --nj 32 --sub-split 32 --num-threads 4 \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali \ + data/train data/lang exp/sgmm5_ali exp/sgmm5_denlats + +steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" --drop-frames true --transform-dir exp/tri5a_ali --boost 0.1 \ + data/train data/lang exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + +( +utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph +steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 12"\ + exp/tri5a/graph data/dev exp/tri5a/decode_dev +utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph +steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \ + --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \ + exp/sgmm5/graph data/dev exp/sgmm5/decode_dev +for iter in 1 2 3 4; do + decode=exp/sgmm5_mmi_b0.1/decode_dev_it$iter + mkdir -p $decode + steps/decode_sgmm2_rescore.sh \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5a/decode_dev \ + data/lang_test data/dev/ exp/sgmm5/decode_dev $decode +done +) & +fi + +wait; + +if [ $stage -le 6 ]; then + local/chain/run_tdnn_1g.sh || exit 1; +fi +exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/steps b/egs/fisher_callhome_spanish/s5_gigaword/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5_gigaword/utils b/egs/fisher_callhome_spanish/s5_gigaword/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file From e8aecbb584d05eb0b4cad22d3d57a59b0a20a8d5 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Tue, 19 Feb 2019 10:47:15 +0530 Subject: [PATCH 020/235] Some bug fixes --- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 5f7068072f3..89e8fbd434b 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -118,8 +118,8 @@ if [ $stage -le 1 ]; then cat "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt > "$rnnlm_workdir"/rnnlm_wordlist.txt cut -f 1 -d " " data/lang/words.txt >> "$rnnlm_workdir"/rnnlm_wordlist.txt cat "$rnnlm_workdir"/rnnlm_wordlist.txt | sort | uniq > "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq - local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/100000_3.pocolm \ - --wordslist "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq --text "$rnnlm_workdir"/text_lm --text-dir "$rnnlm_workdir"/text_lm + local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ + --wordslist "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq --text-dir "$rnnlm_workdir"/text_lm fi if [ $stage -le 2 ]; then From ece34bd064bfbdcae7b655552057469c5d47b0b2 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Tue, 19 Feb 2019 10:48:44 +0530 Subject: [PATCH 021/235] Update rnnlm.sh --- egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh index aa06fdbb293..3850910f312 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh @@ -21,7 +21,6 @@ lstm_rpd=256 lstm_nrpd=256 stage=0 train_stage=-30 -text=Spanish_gigawrd/text_lm text_dir=Spanish_gigawrd/text_lm . ./cmd.sh @@ -30,7 +29,7 @@ text_dir=Spanish_gigawrd/text_lm mkdir -p $dir/config set -e -for f in $text/dev.txt; do +for f in $text_dir/dev.txt; do [ ! -f $f ] && \ echo "$0: expected file $f to exist;" && exit 1 done From 0c4fe470684751a54e4def8600dde847b8507cd5 Mon Sep 17 00:00:00 2001 From: saikiran valluri Date: Tue, 19 Feb 2019 01:27:47 -0500 Subject: [PATCH 022/235] Combining lexicon words with pocolm wordslist for RNNLM training --- .../s5_gigaword/local/get_rnnlm_wordlist.py | 32 ++++++++++++++ .../s5_gigaword/local/rnnlm.sh | 3 +- .../s5_gigaword/run.sh | 42 ++++--------------- 3 files changed, 42 insertions(+), 35 deletions(-) create mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py new file mode 100755 index 00000000000..d6ddfbecc14 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc. + +import os, sys + +if len(sys.argv) < 4: + print( "Usage: python get_rnnlm_wordlist.py ") + sys.exit() + +lexicon_words = open(sys.argv[1], 'r') +pocolm_words = open(sys.argv[2], 'r') +rnnlm_wordsout = open(sys.argv[3], 'w') + +line_count=0 +lexicon=[] + +for line in lexicon_words: + lexicon.append(line.split()[0]) + rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') + line_count = line_count + 1 + +for line in pocolm_words: + if not line.split()[0] in lexicon: + rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') + line_count = line_count + 1 + +lexicon_words.close() +pocolm_words.close() +rnnlm_wordsout.close() + diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh index aa06fdbb293..3850910f312 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh @@ -21,7 +21,6 @@ lstm_rpd=256 lstm_nrpd=256 stage=0 train_stage=-30 -text=Spanish_gigawrd/text_lm text_dir=Spanish_gigawrd/text_lm . ./cmd.sh @@ -30,7 +29,7 @@ text_dir=Spanish_gigawrd/text_lm mkdir -p $dir/config set -e -for f in $text/dev.txt; do +for f in $text_dir/dev.txt; do [ ! -f $f ] && \ echo "$0: expected file $f to exist;" && exit 1 done diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 5f7068072f3..80c0debfb12 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -19,9 +19,8 @@ callhome_speech=/export/corpora/LDC/LDC96S35 callhome_transcripts=/export/corpora/LDC/LDC96T17 split_callhome=local/splits/split_callhome -gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data # Path to the download of Gigaword data -rnnlm_workdir=/export/c03/svalluri/workdir_rnnlm # Work path for entire Gigaword LM and text processing, should be - # large free spae and easy IO access. +gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data +rnnlm_workdir=/export/c03/svalluri/workdir_rnnlm mfccdir=`pwd`/mfcc . ./cmd.sh @@ -31,8 +30,9 @@ if [ -f path.sh ]; then . ./path.sh; fi set -eou pipefail if [ $stage -le -1 ]; then - local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts - local/callhome_data_prep.sh $callhome_speech $callhome_transcripts +# local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts + +# local/callhome_data_prep.sh $callhome_speech $callhome_transcripts # The lexicon is created using the LDC spanish lexicon, the words from the # fisher spanish corpus. Additional (most frequent) words are added from the @@ -72,29 +72,6 @@ if [ $stage -le -1 ]; then cp -r data/local/data/callhome_train_all data/callhome_train_all - # Creating data partitions for the pipeline - # We need datasets for both the ASR and SMT system - # We have 257455 utterances left, so the partitions are roughly as follows - # ASR Train : 100k utterances - # ASR Tune : 17455 utterances - # ASR Eval : 20k utterances - # MT Train : 100k utterances - # MT Tune : Same as the ASR eval set (Use the lattices from here) - # MT Eval : 20k utterances - # The dev and the test sets need to be carefully chosen so that there is no conversation/speaker - # overlap. This has been setup and the script local/fsp_ideal_data_partitions provides the numbers that are needed below. - # As noted above, the LM has not been trained on the dev and the test sets. - #utils/subset_data_dir.sh --first data/train_all 158126 data/dev_and_test - #utils/subset_data_dir.sh --first data/dev_and_test 37814 data/asr_dev_and_test - #utils/subset_data_dir.sh --last data/dev_and_test 120312 data/mt_train_and_test - #utils/subset_data_dir.sh --first data/asr_dev_and_test 17662 data/dev - #utils/subset_data_dir.sh --last data/asr_dev_and_test 20152 data/test - #utils/subset_data_dir.sh --first data/mt_train_and_test 100238 data/mt_train - #utils/subset_data_dir.sh --last data/mt_train_and_test 20074 data/mt_test - #rm -r data/dev_and_test - #rm -r data/asr_dev_and_test - #rm -r data/mt_train_and_test - local/create_splits.sh $split local/callhome_create_splits.sh $split_callhome fi @@ -115,11 +92,10 @@ fi if [ $stage -le 1 ]; then num_words_pocolm=110000 local/train_pocolm.sh --stage $lmstage --num-words-pocolm 110000 "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm - cat "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt > "$rnnlm_workdir"/rnnlm_wordlist.txt - cut -f 1 -d " " data/lang/words.txt >> "$rnnlm_workdir"/rnnlm_wordlist.txt - cat "$rnnlm_workdir"/rnnlm_wordlist.txt | sort | uniq > "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq - local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/100000_3.pocolm \ - --wordslist "$rnnlm_workdir"/rnnlm_wordlist.txt.uniq --text "$rnnlm_workdir"/text_lm --text-dir "$rnnlm_workdir"/text_lm + local/get_rnnlm_wordlist.py data/lang/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ + "$rnnlm_workdir"/rnnlm_wordlist + local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ + --wordslist "$rnnlm_workdir"/rnnlm_wordlist --text-dir "$rnnlm_workdir"/text_lm fi if [ $stage -le 2 ]; then From abfbc567dcc95a100d1d49ff945d081d47170a0e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 20 Feb 2019 13:08:15 -0500 Subject: [PATCH 023/235] [src] Thread-safety for GrammarFst (thx:armando.muscariello@gmail.com) (#3040) --- src/decoder/grammar-fst.cc | 26 ++++++--------- src/decoder/grammar-fst.h | 31 ++++++++--------- src/decoder/lattice-faster-decoder.h | 12 +++---- src/doc/kaldi_for_dummies.dox | 50 ++++++++++++++-------------- src/doc/tutorial_setup.dox | 6 ++-- src/fstbin/make-grammar-fst.cc | 16 ++++----- 6 files changed, 67 insertions(+), 74 deletions(-) diff --git a/src/decoder/grammar-fst.cc b/src/decoder/grammar-fst.cc index 27d8c9998ea..ab1a8142c1d 100644 --- a/src/decoder/grammar-fst.cc +++ b/src/decoder/grammar-fst.cc @@ -25,10 +25,10 @@ namespace fst { GrammarFst::GrammarFst( int32 nonterm_phones_offset, - const ConstFst &top_fst, - const std::vector *> > &ifsts): + std::shared_ptr > top_fst, + const std::vector > > > &ifsts): nonterm_phones_offset_(nonterm_phones_offset), - top_fst_(&top_fst), + top_fst_(top_fst), ifsts_(ifsts) { Init(); } @@ -69,11 +69,6 @@ void GrammarFst::Destroy() { nonterminal_map_.clear(); entry_arcs_.clear(); instances_.clear(); - // the following will only do something if we read this object from disk using - // its Read() function. - for (size_t i = 0; i < fsts_to_delete_.size(); i++) - delete fsts_to_delete_[i]; - fsts_to_delete_.clear(); } @@ -127,7 +122,7 @@ void GrammarFst::InitInstances() { KALDI_ASSERT(instances_.empty()); instances_.resize(1); instances_[0].ifst_index = -1; - instances_[0].fst = top_fst_; + instances_[0].fst = top_fst_.get(); instances_[0].parent_instance = -1; instances_[0].parent_state = -1; } @@ -314,7 +309,7 @@ int32 GrammarFst::GetChildInstanceId(int32 instance_id, int32 nonterminal, } int32 ifst_index = iter->second; child_instance.ifst_index = ifst_index; - child_instance.fst = ifsts_[ifst_index].second; + child_instance.fst = ifsts_[ifst_index].second.get(); child_instance.parent_instance = instance_id; child_instance.parent_state = state; InitEntryOrReentryArcs(*(parent_instance.fst), state, @@ -429,15 +424,14 @@ void GrammarFst::Read(std::istream &is, bool binary) { "update your code."; ReadBasicType(is, binary, &num_ifsts); ReadBasicType(is, binary, &nonterm_phones_offset_); - top_fst_ = ReadConstFstFromStream(is); - fsts_to_delete_.push_back(top_fst_); + top_fst_ = std::shared_ptr >(ReadConstFstFromStream(is)); for (int32 i = 0; i < num_ifsts; i++) { int32 nonterminal; ReadBasicType(is, binary, &nonterminal); - ConstFst *this_fst = ReadConstFstFromStream(is); - fsts_to_delete_.push_back(this_fst); - ifsts_.push_back(std::pair* >(nonterminal, - this_fst)); + std::shared_ptr > + this_fst(ReadConstFstFromStream(is)); + ifsts_.push_back(std::pair > >( + nonterminal, this_fst)); } Init(); } diff --git a/src/decoder/grammar-fst.h b/src/decoder/grammar-fst.h index b82d7b3bc9f..cfbfcad4ec6 100644 --- a/src/decoder/grammar-fst.h +++ b/src/decoder/grammar-fst.h @@ -88,9 +88,11 @@ template<> class ArcIterator; points whenever we invoke a nonterminal. For more information see \ref grammar (i.e. ../doc/grammar.dox). - Caution: this class is not thread safe, i.e. you shouldn't access the same - GrammarFst from multiple threads. We can fix this later if needed. - */ + THREAD SAFETY: you can't use this object from multiple threads; you should + create lightweight copies of this object using the copy constructor, + e.g. `new GrammarFst(this_grammar_fst)`, if you want to decode from multiple + threads using the same GrammarFst. +*/ class GrammarFst { public: typedef GrammarFstArc Arc; @@ -136,16 +138,20 @@ class GrammarFst { phones.txt, i.e. the things with names like "#nonterm:foo" and "#nonterm:bar" in phones.txt. Also no nonterminal may appear more than once in 'fsts'. ifsts may be empty, even though that doesn't - make much sense. This function does not take ownership of - these pointers (i.e. it will not delete them when it is destroyed). + make much sense. */ GrammarFst( int32 nonterm_phones_offset, - const ConstFst &top_fst, - const std::vector *> > &ifsts); + std::shared_ptr > top_fst, + const std::vector > > > &ifsts); + + /// Copy constructor. Useful because this object is not thread safe so cannot + /// be used by multiple parallel decoder threads, but it is lightweight and + /// can copy it without causing the stored FSTs to be copied. + GrammarFst(const GrammarFst &other) = default; /// This constructor should only be used prior to calling Read(). - GrammarFst(): top_fst_(NULL) { } + GrammarFst() { } // This Write function allows you to dump a GrammarFst to disk as a single // object. It only supports binary mode, but the option is allowed for @@ -449,12 +455,12 @@ class GrammarFst { // The top-level FST passed in by the user; contains the start state and // final-states, and may invoke FSTs in 'ifsts_' (which can also invoke // each other recursively). - const ConstFst *top_fst_; + std::shared_ptr > top_fst_; // A list of pairs (nonterm, fst), where 'nonterm' is a user-defined // nonterminal symbol as numbered in phones.txt (e.g. #nonterm:foo), and // 'fst' is the corresponding FST. - std::vector *> > ifsts_; + std::vector > > > ifsts_; // Maps from the user-defined nonterminals like #nonterm:foo as numbered // in phones.txt, to the corresponding index into 'ifsts_', i.e. the ifst_index. @@ -474,11 +480,6 @@ class GrammarFst { // representing top_fst_, and it will be populated with more elements on // demand. An instance_id refers to an index into this vector. std::vector instances_; - - // A list of FSTs that are to be deleted when this object is destroyed. This - // will only be nonempty if we have read this object from the disk using - // Read(). - std::vector *> fsts_to_delete_; }; diff --git a/src/decoder/lattice-faster-decoder.h b/src/decoder/lattice-faster-decoder.h index c611ec9dc05..5f8c0778723 100644 --- a/src/decoder/lattice-faster-decoder.h +++ b/src/decoder/lattice-faster-decoder.h @@ -131,12 +131,12 @@ struct StdToken { // to keep it in a good numerical range). BaseFloat tot_cost; - // exta_cost is >= 0. After calling PruneForwardLinks, this equals - // the minimum difference between the cost of the best path, and the cost of - // this is on, and the cost of the absolute best path, under the assumption - // that any of the currently active states at the decoding front may - // eventually succeed (e.g. if you were to take the currently active states - // one by one and compute this difference, and then take the minimum). + // exta_cost is >= 0. After calling PruneForwardLinks, this equals the + // minimum difference between the cost of the best path that this link is a + // part of, and the cost of the absolute best path, under the assumption that + // any of the currently active states at the decoding front may eventually + // succeed (e.g. if you were to take the currently active states one by one + // and compute this difference, and then take the minimum). BaseFloat extra_cost; // 'links' is the head of singly-linked list of ForwardLinks, which is what we diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index c04e0d0c3e9..d712ab87af9 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -87,14 +87,14 @@ If you do not have much idea about how to use GIT, please read about it: \ref tutorial_git. I installed Kaldi in this directory (called 'Kaldi root path'): -\c /home/{user}/kaldi-trunk +\c /home/{user}/kaldi \section kaldi_for_dummies_directories Kaldi directories structure Try to acknowledge where particular Kaldi components are placed. Also it would be nice if you read any \c README files you find. -\c kaldi-trunk - main Kaldi directory which contains: +\c kaldi - main Kaldi directory which contains: - \c egs – example scripts allowing you to quickly build ASR systems for over 30 popular speech corporas (documentation is attached for each project), @@ -127,7 +127,7 @@ train it, test it and get some decoding results.

Your first task

Something to begin with - create a folder \c digits in -\c kaldi-trunk/egs/ directory. This is a place where you will put all +\c kaldi/egs/ directory. This is a place where you will put all the stuff related to your project. \section kaldi_for_dummies_data Data preparation @@ -156,11 +156,11 @@ careful with big data sets and complex grammars - start with something simple. Sentences that contain only digits are perfect in this case.

Task

-Go to \c kaldi-trunk/egs/digits directory and create -\c digits_audio folder. In \c kaldi-trunk/egs/digits/digits_audio +Go to \c kaldi/egs/digits directory and create +\c digits_audio folder. In \c kaldi/egs/digits/digits_audio create two folders: \c train and \c test. Select one speaker of your choice to represent testing data set. Use this speaker's 'speakerID' as -a name for an another new folder in \c kaldi-trunk/egs/digits/digits_audio/test +a name for an another new folder in \c kaldi/egs/digits/digits_audio/test directory. Then put there all the audio files related to that person. Put the rest (9 speakers) into \c train folder - this will be your training data set. Also create subfolders for each speaker. @@ -178,7 +178,7 @@ And for you information - \c utils directory will be attached to your project in \ref kaldi_for_dummies_tools "Tools attachment" section.

Task

-In \c kaldi-trunk/egs/digits directory, create a folder \c data. Then create +In \c kaldi/egs/digits directory, create a folder \c data. Then create \c test and \c train subfolders inside. Create in each subfolder following files (so you have files named in the same way in \c test and \c train subfolders but they relate to two different data sets that you created before): @@ -207,9 +207,9 @@ for examples below). Pattern: \verbatim -dad_4_4_2 /home/{user}/kaldi-trunk/egs/digits/digits_audio/train/dad/4_4_2.wav -july_1_2_5 /home/{user}/kaldi-trunk/egs/digits/digits_audio/train/july/1_2_5.wav -july_6_8_3 /home/{user}/kaldi-trunk/egs/digits/digits_audio/train/july/6_8_3.wav +dad_4_4_2 /home/{user}/kaldi/egs/digits/digits_audio/train/dad/4_4_2.wav +july_1_2_5 /home/{user}/kaldi/egs/digits/digits_audio/train/july/1_2_5.wav +july_6_8_3 /home/{user}/kaldi/egs/digits/digits_audio/train/july/6_8_3.wav # and so on... \endverbatim @@ -236,8 +236,8 @@ july_6_8_3 july \endverbatim e.) \c corpus.txt
-This file has a slightly different directory. In \c kaldi-trunk/egs/digits/data -create another folder \c local. In \c kaldi-trunk/egs/digits/data/local create a +This file has a slightly different directory. In \c kaldi/egs/digits/data +create another folder \c local. In \c kaldi/egs/digits/data/local create a file \c corpus.txt which should contain every single utterance transcription that can occur in your ASR system (in our case it will be 100 lines from 100 audio files). @@ -258,8 +258,8 @@ is precisely described). Also feel free to read some examples in other \c egs scripts. Now is the perfect time.

Task

-In \c kaldi-trunk/egs/digits/data/local directory, create a folder \c dict. In -\c kaldi-trunk/egs/digits/data/local/dict create following files: +In \c kaldi/egs/digits/data/local directory, create a folder \c dict. In +\c kaldi/egs/digits/data/local/dict create following files: a.) \c lexicon.txt
This file contains every word from your dictionary with its 'phone @@ -337,19 +337,19 @@ complete. You need to add necessary Kaldi tools that are widely used in exemplary scripts.

Task

-From \c kaldi-trunk/egs/wsj/s5 copy two folders (with the whole content) - +From \c kaldi/egs/wsj/s5 copy two folders (with the whole content) - \c utils and \c steps - and put them in your -\c kaldi-trunk/egs/digits directory. You can also create links to these +\c kaldi/egs/digits directory. You can also create links to these directories. You may find such links in, for example, -\c kaldi-trunk/egs/voxforge/s5. +\c kaldi/egs/voxforge/s5. \subsection kaldi_for_dummies_scoring Scoring script This script will help you to get decoding results.

Task

-From \c kaldi-trunk/egs/voxforge/s5/local copy the script \c score.sh into -similar location in your project (\c kaldi-trunk/egs/digits/local). +From \c kaldi/egs/voxforge/s5/local copy the script \c score.sh into +similar location in your project (\c kaldi/egs/digits/local). \subsection kaldi_for_dummies_srilm SRILM installation @@ -358,7 +358,7 @@ example - SRI Language Modeling Toolkit (SRILM).

Task

For detailed installation instructions go to -\c kaldi-trunk/tools/install_srilm.sh (read all comments inside). +\c kaldi/tools/install_srilm.sh (read all comments inside). \subsection kaldi_for_dummies_configuration Configuration files @@ -366,8 +366,8 @@ It is not necessary to create configuration files but it can be a good habit for future.

Task

-In \c kaldi-trunk/egs/digits create a folder \c conf. Inside -\c kaldi-trunk/egs/digits/conf create two files (for some configuration +In \c kaldi/egs/digits create a folder \c conf. Inside +\c kaldi/egs/digits/conf create two files (for some configuration modifications in decoding and mfcc feature extraction processes - taken from \c /egs/voxforge): @@ -398,7 +398,7 @@ These two methods are enough to show noticable differences in decoding results using only digits lexicon and small training data set.

Task

-In \c kaldi-trunk/egs/digits directory create 3 scripts: +In \c kaldi/egs/digits directory create 3 scripts: a.) \c cmd.sh
\code{.sh} @@ -416,7 +416,7 @@ export KALDI_ROOT=`pwd`/../.. export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH # Defining audio data directory (modify it for your installation directory!) -export DATA_ROOT="/home/{user}/kaldi-trunk/egs/digits/digits_audio" +export DATA_ROOT="/home/{user}/kaldi/egs/digits/digits_audio" # Enable SRILM . $KALDI_ROOT/tools/env.sh @@ -564,7 +564,7 @@ Now all you have to do is to run \c run.sh script. If I have made any mistakes in this tutorial, logs from the terminal should guide you how to deal with it. Besides the fact that you will notice some decoding results in the terminal -window, go to newly made \c kaldi-trunk/egs/digits/exp. You may notice there +window, go to newly made \c kaldi/egs/digits/exp. You may notice there folders with \c mono and \c tri1 results as well - directories structure are the same. Go to \c mono/decode directory. Here you may find result files (named in a wer_{number} way). Logs for decoding process may be found in \c log diff --git a/src/doc/tutorial_setup.dox b/src/doc/tutorial_setup.dox index 11d97a945f9..13f5e3e9c74 100644 --- a/src/doc/tutorial_setup.dox +++ b/src/doc/tutorial_setup.dox @@ -34,16 +34,16 @@ Assuming Git is installed, to get the latest code you can type \verbatim - git clone https://github.com/kaldi-asr/kaldi.git kaldi-trunk --origin golden + git clone https://github.com/kaldi-asr/kaldi.git \endverbatim - Then cd to kaldi-trunk. Look at the INSTALL file and follow the instructions + Then cd to kaldi. Look at the INSTALL file and follow the instructions (it points you to two subdirectories). Look carefully at the output of the installation scripts, as they try to guide you what to do. Some installation errors are non-fatal, and the installation scripts will tell you so (i.e. there are some things it installs which are nice to have but are not really needed). The "best-case" scenario is that you do: \verbatim - cd kaldi-trunk/tools/; make; cd ../src; ./configure; make + cd kaldi/tools/; make; cd ../src; ./configure; make \endverbatim and everything will just work; however, if this does not happen there are fallback plans (e.g. you may have to install some package on your machine, or run diff --git a/src/fstbin/make-grammar-fst.cc b/src/fstbin/make-grammar-fst.cc index f7fd46a4a55..fc9a17908f9 100644 --- a/src/fstbin/make-grammar-fst.cc +++ b/src/fstbin/make-grammar-fst.cc @@ -114,8 +114,9 @@ int main(int argc, char *argv[]) { std::string top_fst_str = po.GetArg(1), fst_out_str = po.GetArg(po.NumArgs()); - ConstFst *top_fst = ReadAsConstFst(top_fst_str); - std::vector* > > pairs; + std::shared_ptr > top_fst( + ReadAsConstFst(top_fst_str)); + std::vector > > > pairs; int32 num_pairs = (po.NumArgs() - 2) / 2; for (int32 i = 1; i <= num_pairs; i++) { @@ -126,12 +127,13 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Expected positive integer as nonterminal, got: " << nonterm_str; std::string fst_str = po.GetArg(2*i + 1); - ConstFst *fst = ReadAsConstFst(fst_str); - pairs.push_back(std::pair* >(nonterminal, fst)); + std::shared_ptr > this_fst(ReadAsConstFst(fst_str)); + pairs.push_back(std::pair > >( + nonterminal, this_fst)); } GrammarFst *grammar_fst = new GrammarFst(nonterm_phones_offset, - *top_fst, + top_fst, pairs); if (write_as_grammar) { @@ -151,10 +153,6 @@ int main(int argc, char *argv[]) { cfst.Write(ko.Stream(), wopts); } - delete top_fst; - for (size_t i = 0; i < pairs.size(); i++) - delete pairs[i].second; - KALDI_LOG << "Created grammar FST and wrote it to " << fst_out_str; } catch(const std::exception &e) { From f09d48a4d37a0172f882dce2ced0790b9eaa4374 Mon Sep 17 00:00:00 2001 From: Teddyang Date: Fri, 22 Feb 2019 02:18:44 +0800 Subject: [PATCH 024/235] [scripts] Cosmetic fix to get_degs.sh (#3045) --- egs/wsj/s5/steps/nnet3/get_degs.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh index 8098b59c4ad..7853daa4563 100755 --- a/egs/wsj/s5/steps/nnet3/get_degs.sh +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -471,7 +471,6 @@ if [ $stage -le 10 ] && $cleanup; then fi -exit 0 - - echo "$0: Finished decoding and preparing training examples" + +exit 0 From b0fc09d1a8f6064672017780401ca6656406308a Mon Sep 17 00:00:00 2001 From: ChunChiehChang <28868330+ChunChiehChang@users.noreply.github.com> Date: Thu, 21 Feb 2019 17:33:43 -0500 Subject: [PATCH 025/235] [egs] Small bug fixes for IAM and UW3 recipes (#3048) --- egs/iam/v1/local/train_lm.sh | 2 +- egs/uw3/v1/local/process_data.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/iam/v1/local/train_lm.sh b/egs/iam/v1/local/train_lm.sh index 911f54c5439..3e8c838efdb 100755 --- a/egs/iam/v1/local/train_lm.sh +++ b/egs/iam/v1/local/train_lm.sh @@ -60,7 +60,7 @@ if [ $stage -le 0 ]; then # Using LOB and brown corpus. if [ ! -f data/local/lob-train-only.txt ]; then cat data/local/lobcorpus/0167/download/LOB_COCOA/lob.txt | \ - local/remove_test_utterances_from_lob.py data/test/text.old data/val/text.old \ + local/remove_test_utterances_from_lob.py data/test/text data/val/text \ > data/local/lob-train-only.txt fi cat data/local/lob-train-only.txt > ${dir}/data/text/lob.txt diff --git a/egs/uw3/v1/local/process_data.py b/egs/uw3/v1/local/process_data.py index 3643c0aca89..23b8e5402cf 100755 --- a/egs/uw3/v1/local/process_data.py +++ b/egs/uw3/v1/local/process_data.py @@ -52,10 +52,10 @@ # The dataset is randomly split train 95% and test 5% coin = random.randint(0, 20) if coin >= 1: - train_text_fh.write(utt_id + ' ' + text + '\n') + train_text_fh.write("{} {}\n".format(utt_id, text)) train_utt2spk_fh.write("{} {}\n".format(utt_id, page_count)) - train_image_fh.write("{} {}\n".format(utt_id, image_path) + train_image_fh.write("{} {}\n".format(utt_id, image_path)) elif coin < 1: test_text_fh.write("{} {}\n".format(utt_id, text)) test_utt2spk_fh.write("{} {}\n".format(utt_id, page_count)) - train_image_fh.write("{} {}\n".format(utt_id, image_path) + train_image_fh.write("{} {}\n".format(utt_id, image_path)) From 1439b0dd9d0d2ae527e0ddd14c6a4b39c7bd7075 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Sun, 24 Feb 2019 01:54:54 -0500 Subject: [PATCH 026/235] Integrated the 2 stage scientific method POCOLM training for Gigaword corpus --- .../s5_gigaword/cmd.sh | 2 +- .../local/get_unigram_weights_vocab.py | 33 +++++++++++++++++++ .../s5_gigaword/local/pocolm_cust.sh | 7 ++-- .../s5_gigaword/local/train_pocolm.sh | 26 +++++++++++---- .../s5_gigaword/run.sh | 5 ++- 5 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py diff --git a/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh b/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh index 0511bd2bbb0..db97f1fbc6f 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="retry.pl queue.pl" +export train_cmd="retry.pl queue.pl --mem 8G" export decode_cmd="retry.pl queue.pl --mem 8G" export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py new file mode 100644 index 00000000000..43cf8392167 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc. + +import os, sys + +if len(sys.argv) < 3: + print("Usage : python . ") + print(" Used for generating the unigram weights for second pass vocabulary from the first pass pocolm training metaparameters.") + sys.exit() + +pocolmdir=sys.argv[1] +unigramwts=open(sys.argv[2], 'w') + +names = open(pocolmdir+"/names", 'r') +metaparams = open(pocolmdir+"/metaparameters", 'r') + +name_mapper={} +for line in names: + fields=line.split() + name_mapper[fields[0]] = fields[1] + +lns = metaparams.readlines() +for lineno in range(len(name_mapper.keys())): + line = lns[lineno] + fileid = line.split()[0].split("_")[-1] + weight = line.split()[1] + unigramwts.write(name_mapper[fileid] + " " + weight + "\n") + +names.close() +unigramwts.close() +metaparams.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh index a3b2d77d860..c6642f6fcf4 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh @@ -13,6 +13,8 @@ export PATH=$PATH:$POCOLM_ROOT/scripts wordlist=None num_word=100000 +pocolm_stage=2 +ngram_order=3 lm_dir= arpa_dir= textdir= @@ -55,7 +57,7 @@ limit_unk_history_opt= # un-comment the following line #limit_unk_history_opt="--limit-unk-history=true" -for order in 3; do +for order in ${ngram_order}; do # decide on the vocabulary. # Note: you'd use --wordlist if you had a previously determined word-list # that you wanted to use. @@ -72,6 +74,7 @@ for order in 3; do --keep-int-data=true ${fold_dev_opt} ${bypass_metaparam_optim_opt} \ ${limit_unk_history_opt} ${textdir} ${order} ${lm_dir}/work ${unpruned_lm_dir} + if [ $pocolm_stage -eq 2 ];then mkdir -p ${arpa_dir} format_arpa_lm.py ${max_memory} ${unpruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_unpruned.arpa.gz @@ -93,7 +96,7 @@ for order in 3; do get_data_prob.py ${textdir}/dev.txt ${max_memory} ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${size}.arpa.gz - + fi done # (run local/srilm_baseline.sh ${num_word} to see the following result e.g. local/srilm_baseline.sh 40000 ) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh index 29fbeebace6..8ceb08f281a 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh @@ -17,22 +17,34 @@ textdir=$1 pocolm_dir=$2 -if [ $stage -le -2 ];then +if [ $stage -le -2 ]; then + echo "\n\n" + echo " POCOLM experiment : Runnning STAGE 1 : 2-gram Pocolm general closed vocabulary model" + echo " Will estimate the metaparams to be used as unigram weights for stage 2 ....." + echo "\n\n" if [ -e "$textdir"/unigram_weights ]; then rm "$textdir"/unigram_weights fi - if [ -e "$pocolm_dir" ]; then rm -r "$pocolm_dir" fi + + bash local/pocolm_cust.sh --num-word 0 --ngram-order 2 --pocolm-stage 1 --lm-dir "$pocolm_dir"/lm \ + --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" - bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ - --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" fi - + if [ $stage -le -1 ];then - prune_lm_dir.py --target-num-ngrams=${prune_size} --max-memory=8G "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned - format_arpa_lm.py --max-memory=8G "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned | gzip -c > "$pocolm_dir"/arpa/"$num_words_pocolm"_3.pocolm_pruned_${prune_size}.arpa.gz + echo "\n\n" + echo "POCOLM experiment : RUNNING STAGE 2 : 3gram POCOLM using unigram wts estimates in 1st stage....." + echo "\n\n" + + echo " " > "$pocolm_dir"/lm/work/.unigram_weights.done + python local/get_unigramwts.py "$pocolm_dir"/lm/0_2.pocolm/ "$textdir"/unigram_weights + bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ + --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" + + fi diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 80c0debfb12..6e2ee9d4f25 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -20,7 +20,7 @@ callhome_transcripts=/export/corpora/LDC/LDC96T17 split_callhome=local/splits/split_callhome gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data -rnnlm_workdir=/export/c03/svalluri/workdir_rnnlm +rnnlm_workdir=/export/c03/svalluri/workdir_pocolm_2stage mfccdir=`pwd`/mfcc . ./cmd.sh @@ -94,6 +94,9 @@ if [ $stage -le 1 ]; then local/train_pocolm.sh --stage $lmstage --num-words-pocolm 110000 "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm local/get_rnnlm_wordlist.py data/lang/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ "$rnnlm_workdir"/rnnlm_wordlist +fi + +if [ $stage -le 2 ]; then local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ --wordslist "$rnnlm_workdir"/rnnlm_wordlist --text-dir "$rnnlm_workdir"/text_lm fi From 8ad0e0130c011fef22f583d1ca60e0c0d6f856a0 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Tue, 26 Feb 2019 05:17:14 +0000 Subject: [PATCH 027/235] Update train_pocolm.sh --- .../s5_gigaword/local/train_pocolm.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh index 8ceb08f281a..c8adb79383e 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh @@ -18,10 +18,10 @@ pocolm_dir=$2 if [ $stage -le -2 ]; then - echo "\n\n" - echo " POCOLM experiment : Runnning STAGE 1 : 2-gram Pocolm general closed vocabulary model" + echo "****" + echo " POCOLM experiment : Running STAGE 1 : 2-gram Pocolm general closed vocabulary model" echo " Will estimate the metaparams to be used as unigram weights for stage 2 ....." - echo "\n\n" + echo "****" if [ -e "$textdir"/unigram_weights ]; then rm "$textdir"/unigram_weights fi @@ -35,12 +35,12 @@ if [ $stage -le -2 ]; then fi if [ $stage -le -1 ];then - echo "\n\n" + echo "********" echo "POCOLM experiment : RUNNING STAGE 2 : 3gram POCOLM using unigram wts estimates in 1st stage....." - echo "\n\n" + echo "********" echo " " > "$pocolm_dir"/lm/work/.unigram_weights.done - python local/get_unigramwts.py "$pocolm_dir"/lm/0_2.pocolm/ "$textdir"/unigram_weights + python local/get_unigram_weights_vocab.py "$pocolm_dir"/lm/0_2.pocolm/ "$textdir"/unigram_weights bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" From 4494a85b16f7e6109aae66c99d8da4702d00b467 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 26 Feb 2019 10:40:45 -0500 Subject: [PATCH 028/235] [scripts] Nnet3 segmentation: fix default params (#3051) --- .../cleanup/segment_long_utterances_nnet3.sh | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh index ae355c9f753..751200bdf83 100755 --- a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh +++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh @@ -4,7 +4,8 @@ # 2016 Vimal Manohar # Apache 2.0 -# This script is similar to steps/cleanup/segment_long_utterances.sh, but + +# This script is similar to steps/cleanup/segment_long_utterances.sh, but # uses nnet3 acoustic model instead of GMM acoustic model for decoding. # This script performs segmentation of the input data based on the transcription # and outputs segmented data along with the corresponding aligned transcription. @@ -13,7 +14,7 @@ # are of manageable length for further processing, along with the portion of the # transcript that seems to match (aligns with) each segment. # This the light-supervised training scenario where the input transcription is -# not expected to be completely clean and may have significant errors. +# not expected to be completely clean and may have significant errors. # See "JHU Kaldi System for Arabic MGB-3 ASR Challenge using Diarization, # Audio-transcript Alignment and Transfer Learning": Vimal Manohar, Daniel # Povey, Sanjeev Khudanpur, ASRU 2017 @@ -39,24 +40,22 @@ seconds_per_spk_max=30 # Decode options graph_opts= +scale_opts= # for making the graphs beam=15.0 lattice_beam=1.0 lmwt=10 - acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. -post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the - # regular scoring script works. # Contexts must ideally match training extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) -extra_right_context=0 +extra_right_context=0 extra_left_context_initial=-1 extra_right_context_final=-1 frames_per_chunk=150 # i-vector options -extractor= # i-Vector extractor. If provided, will extract i-vectors. - # Required if the network was trained with i-vector extractor. +extractor= # i-Vector extractor. If provided, will extract i-vectors. + # Required if the network was trained with i-vector extractor. use_vad=false # Use energy-based VAD for i-vector extraction # TF-IDF similarity search options @@ -116,12 +115,12 @@ it and eliminate data where the transcript doesn't seem to match. --segmentation-extra-opts 'opts' # Additional options to segment_ctm_edits_mild.py. # Please run steps/cleanup/internal/segment_ctm_edits_mild.py # without arguments to see allowed options. - --align-full-hyp # If true, align full hypothesis - i.e. trackback from the end to get the alignment. - This is different from the normal + --align-full-hyp # If true, align full hypothesis + i.e. trackback from the end to get the alignment. + This is different from the normal Smith-Waterman alignment, where the traceback will be from the maximum score. - --extractor # i-vector extractor directory if i-vector is + --extractor # i-vector extractor directory if i-vector is # to be used during decoding. Must match # the extractor used for training neural-network. --use-vad # If true, uses energy-based VAD to apply frame weights @@ -168,6 +167,23 @@ cp $srcdir/cmvn_opts $dir cp $srcdir/{splice_opts,delta_opts,final.mat,final.alimdl} $dir 2>/dev/null || true cp $srcdir/frame_subsampling_factor $dir 2>/dev/null || true +if [ -f $srcdir/frame_subsampling_factor ]; then + echo "$0: guessing that this is a chain system, checking parameters." + if [ -z $scale_opts ]; then + echo "$0: setting scale_opts" + scale_opts="--self-loop-scale=1.0 --transition-scale=1.0" + fi + if [ $acwt == 0.1 ]; then + echo "$0: setting acwt=1.0" + acwt=1.0 + fi + if [ $lmwt == 10 ]; then + echo "$0: setting lmwt=1.0" + lmwt=1 + fi +fi + + utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt cp $lang/phones.txt $dir @@ -221,6 +237,7 @@ if [ $stage -le 3 ]; then # Make graphs w.r.t. to the original text (usually recording-level) steps/cleanup/make_biased_lm_graphs.sh $graph_opts \ + --scale-opts "$scale_opts" \ --nj $nj --cmd "$cmd" $text \ $lang $dir $dir/graphs if [ -z "$utt2text" ]; then @@ -267,7 +284,7 @@ if [ $stage -le 5 ]; then echo "$0: Decoding with biased language models..." steps/cleanup/decode_segmentation_nnet3.sh \ - --acwt $acwt --post-decode-acwt $post_decode_acwt \ + --acwt $acwt \ --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \ --skip-scoring true --allow-partial false \ --extra-left-context $extra_left_context \ From bf33f1fb13ee8ddfe4cd0df3d73656e1b491ef01 Mon Sep 17 00:00:00 2001 From: igrinis <42712209+igrinis@users.noreply.github.com> Date: Tue, 26 Feb 2019 20:21:39 +0200 Subject: [PATCH 029/235] [scripts] Allow perturb_data_dir_speed.sh to work with utt2lang (#3055) --- egs/wsj/s5/utils/perturb_data_dir_speed.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/egs/wsj/s5/utils/perturb_data_dir_speed.sh b/egs/wsj/s5/utils/perturb_data_dir_speed.sh index a50cdb04be4..99c9cbdb1f0 100755 --- a/egs/wsj/s5/utils/perturb_data_dir_speed.sh +++ b/egs/wsj/s5/utils/perturb_data_dir_speed.sh @@ -102,6 +102,9 @@ fi if [ -f $srcdir/spk2gender ]; then utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender fi +if [ -f $srcdir/utt2lang ]; then + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2lang >$destdir/utt2lang +fi #prepare speed-perturbed utt2dur if [ ! -f $srcdir/utt2dur ]; then From 5f05d5991bd7b41e564819ede6be7acd12199423 Mon Sep 17 00:00:00 2001 From: Xiaohui Zhang Date: Tue, 26 Feb 2019 19:34:05 -0500 Subject: [PATCH 030/235] [scripts] Make beam in monophone training configurable (#3057) --- egs/wsj/s5/steps/train_mono.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/steps/train_mono.sh b/egs/wsj/s5/steps/train_mono.sh index 141d128c329..5a0b79a4a1c 100755 --- a/egs/wsj/s5/steps/train_mono.sh +++ b/egs/wsj/s5/steps/train_mono.sh @@ -1,5 +1,6 @@ #!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# 2019 Xiaohui Zhang # Apache 2.0 @@ -13,6 +14,9 @@ cmd=run.pl scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" num_iters=40 # Number of iterations of training max_iter_inc=30 # Last iter to increase #Gauss on. +initial_beam=6 # beam used in the first iteration (set smaller to speed up initialization) +regular_beam=10 # beam used after the first iteration +retry_beam=40 totgauss=1000 # Target #Gaussians. careful=false boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment @@ -105,8 +109,7 @@ if [ $stage -le 0 ]; then rm $dir/0.*.acc fi - -beam=6 # will change to 10 below after 1st pass +beam=$initial_beam # will change to regular_beam below after 1st pass # note: using slightly wider beams for WSJ vs. RM. x=1 while [ $x -lt $num_iters ]; do @@ -116,7 +119,7 @@ while [ $x -lt $num_iters ]; do echo "$0: Aligning data" mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |" $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ - gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] --careful=$careful "$mdl" \ + gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \ "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" \ || exit 1; fi @@ -132,7 +135,7 @@ while [ $x -lt $num_iters ]; do if [ $x -le $max_iter_inc ]; then numgauss=$[$numgauss+$incgauss]; fi - beam=10 + beam=$regular_beam x=$[$x+1] done From f856ac2c4cd0da3c7df4aab65a1eace387dd60b7 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Wed, 27 Feb 2019 15:36:32 +0530 Subject: [PATCH 031/235] Update run.sh --- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 6e2ee9d4f25..bd553fc720e 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -88,10 +88,9 @@ if [ $stage -le 0 ]; then cp "$rnnlm_workdir"/normalised_gigaword_corpus/text_normalized "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt fi - +num_words_pocolm=110000 if [ $stage -le 1 ]; then - num_words_pocolm=110000 - local/train_pocolm.sh --stage $lmstage --num-words-pocolm 110000 "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm + local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm local/get_rnnlm_wordlist.py data/lang/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ "$rnnlm_workdir"/rnnlm_wordlist fi From c0a555e4ce3a8681f0f8bd12775c64fbd5eab9a9 Mon Sep 17 00:00:00 2001 From: rezame <36230722+rezame@users.noreply.github.com> Date: Wed, 27 Feb 2019 22:53:51 +0330 Subject: [PATCH 032/235] [scripts] Allow reverberate_data_dir.py to support unicode filenames (#3060) --- egs/wsj/s5/steps/data/reverberate_data_dir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py index 189f4619ddb..b1745a4b723 100755 --- a/egs/wsj/s5/steps/data/reverberate_data_dir.py +++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py @@ -371,8 +371,8 @@ def GenerateReverberatedWavScp(wav_scp, # a dictionary whose values are the Kal # This function replicate the entries in files like segments, utt2spk, text def AddPrefixToFields(input_file, output_file, num_replicas, include_original, prefix, field = [0]): - list = [x.strip() for x in open(input_file)] - f = open(output_file, "w") + list = [x.strip() for x in open(input_file, encoding='utf-8')] + f = open(output_file, "w" ,encoding='utf-8') if include_original: start_index = 0 else: From 684f029e77da3426c59e3b4106ce6b45160de088 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Thu, 28 Feb 2019 11:57:29 +0000 Subject: [PATCH 033/235] Text cleaning script for splitting Abbreviation words added --- .../s5_gigaword/local/clean_abbrevs_text.py | 33 +++++++++++++++++++ .../s5_gigaword/local/run_norm.sh | 3 ++ scripts/rnnlm/choose_features.py | 12 ++----- scripts/rnnlm/get_best_model.py | 28 ++++++++-------- scripts/rnnlm/get_embedding_dim.py | 4 +-- scripts/rnnlm/get_num_splits.sh | 2 +- scripts/rnnlm/get_special_symbol_opts.py | 8 ++--- scripts/rnnlm/get_unigram_probs.py | 18 ++++------ scripts/rnnlm/get_vocab.py | 11 +++---- scripts/rnnlm/get_word_features.py | 15 ++++----- scripts/rnnlm/lmrescore.sh | 6 ---- scripts/rnnlm/lmrescore_nbest.sh | 4 +-- scripts/rnnlm/lmrescore_pruned.sh | 17 +++------- scripts/rnnlm/prepare_rnnlm_dir.sh | 9 ++--- scripts/rnnlm/prepare_split_data.py | 13 +++----- scripts/rnnlm/rnnlm_cleanup.py | 2 +- scripts/rnnlm/show_word_features.py | 19 +++-------- scripts/rnnlm/train_rnnlm.sh | 2 +- scripts/rnnlm/validate_features.py | 7 ++-- scripts/rnnlm/validate_text_dir.py | 11 +++---- scripts/rnnlm/validate_word_features.py | 11 +++---- 21 files changed, 104 insertions(+), 131 deletions(-) create mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py new file mode 100644 index 00000000000..22fc54f18cc --- /dev/null +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc., + +import os, sys +import re +import codecs + +if len(sys.argv) < 3: + print("Usage : python clean_abbrevs_text.py ") + print(" Processes the text before text normalisation to convert uppercase words as space separated letters") + sys.exit() + +inputfile=codecs.open(sys.argv[1], encoding='utf-8') +outputfile=codecs.open(sys.argv[2], encoding='utf-8', mode='w+') + +for line in inputfile: + words = line.split() + textout = "" + wordcnt = 0 + for word in words: + if re.match(r"\b([A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ])+[']?s?\b", word) and wordcnt>0: + print(word) + word = re.sub('\'?s', 's', word) + textout = textout + " ".join(word) + " " + else: + textout = textout + word + " " + wordcnt = wordcnt + 1 + outputfile.write(textout.strip()+ '\n') + +inputfile.close() +outputfile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh index 4a26f6857b8..f88fecc815c 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh @@ -24,7 +24,10 @@ for i in "${punctuation_symbols[@]}"; do num_syms=$((num_syms+1)) done mkdir -p $dir/normalize/$job +local/clean_abbrevs_text.py $data/$job $data/"$job"_processed +mv $data/"$job"_processed $data/$job echo "cat $data/$job | $substitute_arg" > $dir/normalize/$job/substitute.sh + bash $dir/normalize/$job/substitute.sh | \ sed "s: 's:'s:g" | sed "s: 'm:'m:g" | \ sed "s: \s*: :g" | tr 'A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ' 'a-zâáàäêéèëïíîöóôöúùûñç' > $dir/normalize/$job/text diff --git a/scripts/rnnlm/choose_features.py b/scripts/rnnlm/choose_features.py index c6621e04494..799f6b6dcc8 100755 --- a/scripts/rnnlm/choose_features.py +++ b/scripts/rnnlm/choose_features.py @@ -10,12 +10,6 @@ from collections import defaultdict sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) -# because this script splits inside words, we cannot use latin-1; we actually need to know what -# what the encoding is. By default we make this utf-8; to handle encodings that are not compatible -# with utf-8 (e.g. gbk), we'll eventually have to make the encoding an option to this script. - -import re -tab_or_space = re.compile('[ \t]+') parser = argparse.ArgumentParser(description="This script chooses the sparse feature representation of words. " "To be more specific, it chooses the set of features-- you compute " @@ -90,9 +84,9 @@ # and 'wordlist' is a list indexed by integer id, that returns the string-valued word. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="utf-8") as f: + with open(vocab_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -121,7 +115,7 @@ def read_unigram_probs(unigram_probs_file): unigram_probs = [] with open(unigram_probs_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 idx = int(fields[0]) if idx >= len(unigram_probs): diff --git a/scripts/rnnlm/get_best_model.py b/scripts/rnnlm/get_best_model.py index 333ed8dbfc7..45487b18b0c 100755 --- a/scripts/rnnlm/get_best_model.py +++ b/scripts/rnnlm/get_best_model.py @@ -3,14 +3,14 @@ # Copyright 2017 Johns Hopkins University (author: Daniel Povey) # License: Apache 2.0. +import os import argparse -import glob -import re import sys +import re parser = argparse.ArgumentParser(description="Works out the best iteration of RNNLM training " - "based on dev-set perplexity, and prints the number corresponding " - "to that iteration", + "based on dev-set perplexity, and prints the number corresponding " + "to that iteration", epilog="E.g. " + sys.argv[0] + " exp/rnnlm_a", formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -19,9 +19,10 @@ args = parser.parse_args() -num_iters = None + +num_iters=None try: - with open(args.rnnlm_dir + "/info.txt", encoding="latin-1") as f: + with open(args.rnnlm_dir + "/info.txt", encoding="utf-8") as f: for line in f: a = line.split("=") if a[0] == "num_iters": @@ -35,15 +36,15 @@ sys.exit(sys.argv[0] + ": could not get num_iters from {0}/info.txt".format( args.rnnlm_dir)) -best_objf = -2000 -best_iter = -1 -for i in range(1, num_iters): +best_objf=-2000 +best_iter=-1 +for i in range(num_iters): this_logfile = "{0}/log/compute_prob.{1}.log".format(args.rnnlm_dir, i) try: - f = open(this_logfile, 'r', encoding='latin-1') + f = open(this_logfile, 'r', encoding='utf-8') except: sys.exit(sys.argv[0] + ": could not open log-file {0}".format(this_logfile)) - this_objf = -1000 + this_objf=-1000 for line in f: m = re.search('Overall objf .* (\S+)$', str(line)) if m is not None: @@ -52,10 +53,6 @@ except Exception as e: sys.exit(sys.argv[0] + ": line in file {0} could not be parsed: {1}, error is: {2}".format( this_logfile, line, str(e))) - # verify this iteration still has model files present - if len(glob.glob("{0}/{1}.raw".format(args.rnnlm_dir, i))) == 0: - # this iteration has log files, but model files have been cleaned up, skip it - continue if this_objf == -1000: print(sys.argv[0] + ": warning: could not parse objective function from {0}".format( this_logfile), file=sys.stderr) @@ -66,4 +63,5 @@ if best_iter == -1: sys.exit(sys.argv[0] + ": error: could not get best iteration.") + print(str(best_iter)) diff --git a/scripts/rnnlm/get_embedding_dim.py b/scripts/rnnlm/get_embedding_dim.py index 63eaf307498..b6810ef2cbf 100755 --- a/scripts/rnnlm/get_embedding_dim.py +++ b/scripts/rnnlm/get_embedding_dim.py @@ -45,7 +45,7 @@ left_context=0 right_context=0 for line in out_lines: - line = line.decode('latin-1') + line = line.decode('utf-8') m = re.search(r'input-node name=input dim=(\d+)', line) if m is not None: try: @@ -101,4 +101,4 @@ "nnet '{0}': {1} != {2}".format( args.nnet, input_dim, output_dim)) -print('{}'.format(input_dim)) +print(str(input_dim)) diff --git a/scripts/rnnlm/get_num_splits.sh b/scripts/rnnlm/get_num_splits.sh index 974fd8bf204..93d1f7f169c 100755 --- a/scripts/rnnlm/get_num_splits.sh +++ b/scripts/rnnlm/get_num_splits.sh @@ -65,7 +65,7 @@ tot_with_multiplicities=0 for f in $text/*.counts; do if [ "$f" != "$text/dev.counts" ]; then - this_tot=$(cat $f | awk '{tot += $2} END{printf("%d", tot)}') + this_tot=$(cat $f | awk '{tot += $2} END{print tot}') if ! [ $this_tot -gt 0 ]; then echo "$0: there were no counts in counts file $f" 1>&2 exit 1 diff --git a/scripts/rnnlm/get_special_symbol_opts.py b/scripts/rnnlm/get_special_symbol_opts.py index 4310b116ad7..13fe497faf9 100755 --- a/scripts/rnnlm/get_special_symbol_opts.py +++ b/scripts/rnnlm/get_special_symbol_opts.py @@ -8,9 +8,6 @@ import argparse import sys -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="This script checks whether the special symbols " "appear in words.txt with expected values, if not, it will " "print out the options with correct value to stdout, which may look like " @@ -28,10 +25,9 @@ lower_ids = {} upper_ids = {} -input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1') +input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace') for line in input_stream: - fields = re.split(tab_or_space, line) - assert(len(fields) == 2) + fields = line.split() sym = fields[0] if sym in special_symbols: assert sym not in lower_ids diff --git a/scripts/rnnlm/get_unigram_probs.py b/scripts/rnnlm/get_unigram_probs.py index ab3f9bb382f..32b01728ca3 100755 --- a/scripts/rnnlm/get_unigram_probs.py +++ b/scripts/rnnlm/get_unigram_probs.py @@ -7,9 +7,6 @@ import argparse import sys -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="This script gets the unigram probabilities of words.", epilog="E.g. " + sys.argv[0] + " --vocab-file=data/rnnlm/vocab/words.txt " "--data-weights-file=exp/rnnlm/data_weights.txt data/rnnlm/data " @@ -77,10 +74,10 @@ def get_all_data_sources_except_dev(text_dir): # value is a tuple (repeated_times_per_epoch, weight) def read_data_weights(weights_file, data_sources): data_weights = {} - with open(weights_file, 'r', encoding="latin-1") as f: + with open(weights_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: try: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 3 if fields[0] in data_weights: raise Exception("duplicated data source({0}) specified in " @@ -102,9 +99,9 @@ def read_data_weights(weights_file, data_sources): # return the vocab, which is a dict mapping the word to a integer id. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="latin-1") as f: + with open(vocab_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -131,11 +128,10 @@ def get_counts(data_sources, data_weights, vocab): if weight == 0.0: continue - with open(counts_file, 'r', encoding="latin-1") as f: + with open(counts_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: - fields = re.split(tab_or_space, line) - if len(fields) != 2: print("Warning, should be 2 cols:", fields, line, file=sys.stderr); - assert(len(fields) == 2) + fields = line.split() + assert len(fields) == 2 word = fields[0] count = fields[1] if word not in vocab: diff --git a/scripts/rnnlm/get_vocab.py b/scripts/rnnlm/get_vocab.py index 1502e915f9c..f290ef721c1 100755 --- a/scripts/rnnlm/get_vocab.py +++ b/scripts/rnnlm/get_vocab.py @@ -6,10 +6,7 @@ import os import argparse import sys -sys.stdout = open(1, 'w', encoding='latin-1', closefd=False) - -import re -tab_or_space = re.compile('[ \t]+') +sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) parser = argparse.ArgumentParser(description="This script get a vocab from unigram counts " "of words produced by get_unigram_counts.sh", @@ -28,10 +25,10 @@ # Add the count for every word in counts_file # the result is written into word_counts def add_counts(word_counts, counts_file): - with open(counts_file, 'r', encoding="latin-1") as f: + with open(counts_file, 'r', encoding="utf-8") as f: for line in f: - line = line.strip(" \t\r\n") - word_and_count = re.split(tab_or_space, line) + line = line.strip() + word_and_count = line.split() assert len(word_and_count) == 2 if word_and_count[0] in word_counts: word_counts[word_and_count[0]] += int(word_and_count[1]) diff --git a/scripts/rnnlm/get_word_features.py b/scripts/rnnlm/get_word_features.py index aeb7a3ec6ae..8bdb553b9c8 100755 --- a/scripts/rnnlm/get_word_features.py +++ b/scripts/rnnlm/get_word_features.py @@ -9,9 +9,6 @@ import math from collections import defaultdict -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="This script turns the words into the sparse feature representation, " "using features from rnnlm/choose_features.py.", epilog="E.g. " + sys.argv[0] + " --unigram-probs=exp/rnnlm/unigram_probs.txt " @@ -41,9 +38,9 @@ # return the vocab, which is a dict mapping the word to a integer id. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="latin-1") as f: + with open(vocab_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -62,9 +59,9 @@ def read_vocab(vocab_file): # return a list of unigram_probs, indexed by word id def read_unigram_probs(unigram_probs_file): unigram_probs = [] - with open(unigram_probs_file, 'r', encoding="latin-1") as f: + with open(unigram_probs_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 idx = int(fields[0]) if idx >= len(unigram_probs): @@ -103,9 +100,9 @@ def read_features(features_file): feats['min_ngram_order'] = 10000 feats['max_ngram_order'] = -1 - with open(features_file, 'r', encoding="latin-1") as f: + with open(features_file, 'r', encoding="utf-8", errors='replace') as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [3, 4, 5]) feat_id = int(fields[0]) diff --git a/scripts/rnnlm/lmrescore.sh b/scripts/rnnlm/lmrescore.sh index 9da22ae75a2..cd0cf793d8d 100755 --- a/scripts/rnnlm/lmrescore.sh +++ b/scripts/rnnlm/lmrescore.sh @@ -72,12 +72,6 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ || exit 1; -if ! head -n -1 $rnnlm_dir/config/words.txt | cmp $oldlang/words.txt -; then - # the last word of the RNNLM word list is an added word - echo "$0: Word lists mismatch for lattices and RNNLM." - exit 1 -fi - oldlm_command="fstproject --project_output=true $oldlm |" special_symbol_opts=$(cat $rnnlm_dir/special_symbol_opts.txt) diff --git a/scripts/rnnlm/lmrescore_nbest.sh b/scripts/rnnlm/lmrescore_nbest.sh index 58b19b9fa79..f50a3c909f0 100755 --- a/scripts/rnnlm/lmrescore_nbest.sh +++ b/scripts/rnnlm/lmrescore_nbest.sh @@ -29,7 +29,7 @@ if [ $# != 6 ]; then echo "This version applies an RNNLM and mixes it with the LM scores" echo "previously in the lattices., controlled by the first parameter (rnnlm-weight)" echo "" - echo "Usage: $0 [options] " + echo "Usage: utils/rnnlmrescore.sh " echo "Main options:" echo " --inv-acwt # default 12. e.g. --inv-acwt 17. Equivalent to LM scale to use." echo " # for N-best list generation... note, we'll score at different acwt's" @@ -177,7 +177,7 @@ fi if [ $stage -le 6 ]; then echo "$0: invoking rnnlm/compute_sentence_scores.sh which calls rnnlm to get RNN LM scores." $cmd JOB=1:$nj $dir/log/rnnlm_compute_scores.JOB.log \ - rnnlm/compute_sentence_scores.sh $rnndir $adir.JOB/temp \ + local/rnnlm/compute_sentence_scores.sh $rnndir $adir.JOB/temp \ $adir.JOB/words_text $adir.JOB/lmwt.rnn fi if [ $stage -le 7 ]; then diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh index 9ba78415708..46ee5846424 100755 --- a/scripts/rnnlm/lmrescore_pruned.sh +++ b/scripts/rnnlm/lmrescore_pruned.sh @@ -16,18 +16,16 @@ max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram- # the same ngram history and this prevents the lattice from # exploding exponentially. Details of the n-gram approximation # method are described in section 2.3 of the paper - # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf -max_arcs= # limit the max arcs in lattice while rescoring. E.g., 20000 + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdm +max_arcs=499 # limit the max arcs in lattice while rescoring. E.g., 20000 -acwt=0.1 -weight=0.5 # Interpolation weight for RNNLM. +acwt=1 +weight=1 # Interpolation weight for RNNLM. normalize=false # If true, we add a normalization step to the output of the RNNLM # so that it adds up to *exactly* 1. Note that this is not necessary # as in our RNNLM setup, a properly trained network would automatically # have its normalization term close to 1. The details of this # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf -lattice_prune_beam=4 # Beam used in pruned lattice composition - # This option affects speed and how large the composed lattice may be # End configuration section. @@ -75,12 +73,6 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ || exit 1; -if ! head -n -1 $rnnlm_dir/config/words.txt | cmp $oldlang/words.txt -; then - # the last word of the RNNLM word list is an added word - echo "$0: Word lists mismatch for lattices and RNNLM." - exit 1 -fi - normalize_opt= if $normalize; then normalize_opt="--normalize-probs=true" @@ -105,7 +97,6 @@ cp $indir/num_jobs $outdir $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=$weight $special_symbol_opts \ - --lattice-compose-beam=$lattice_prune_beam \ --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order $normalize_opt $max_arcs_opt \ $carpa_option $oldlm $word_embedding "$rnnlm_dir/final.raw" \ "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; diff --git a/scripts/rnnlm/prepare_rnnlm_dir.sh b/scripts/rnnlm/prepare_rnnlm_dir.sh index e101822d983..d3ee44f1f95 100755 --- a/scripts/rnnlm/prepare_rnnlm_dir.sh +++ b/scripts/rnnlm/prepare_rnnlm_dir.sh @@ -23,7 +23,7 @@ if [ $# != 3 ]; then echo "Usage: $0 [options] " echo "Sets up the directory for RNNLM training as done by" echo "rnnlm/train_rnnlm.sh, and initializes the model." - echo " is as validated by rnnlm/validate_text_dir.py" + echo " is as validated by rnnlm/validate_data_dir.py" echo " is as validated by rnnlm/validate_config_dir.sh." exit 1 fi @@ -34,7 +34,6 @@ config_dir=$2 dir=$3 set -e -. ./path.sh if [ $stage -le 0 ]; then echo "$0: validating input" @@ -53,13 +52,9 @@ if [ $stage -le 1 ]; then echo "$0: copying config directory" mkdir -p $dir/config # copy expected things from $config_dir to $dir/config. - for f in words.txt data_weights.txt oov.txt xconfig; do + for f in words.txt features.txt data_weights.txt oov.txt xconfig; do cp $config_dir/$f $dir/config done - # features.txt is optional, check separately - if [ -f $config_dir/features.txt ]; then - cp $config_dir/features.txt $dir/config - fi fi rnnlm/get_special_symbol_opts.py < $dir/config/words.txt > $dir/special_symbol_opts.txt diff --git a/scripts/rnnlm/prepare_split_data.py b/scripts/rnnlm/prepare_split_data.py index cceac48313e..9cc4f69d09f 100755 --- a/scripts/rnnlm/prepare_split_data.py +++ b/scripts/rnnlm/prepare_split_data.py @@ -8,9 +8,6 @@ import argparse import sys -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="This script prepares files containing integerized text, " "for consumption by nnet3-get-egs.", epilog="E.g. " + sys.argv[0] + " --vocab-file=data/rnnlm/vocab/words.txt " @@ -66,10 +63,10 @@ def get_all_data_sources_except_dev(text_dir): # value is a tuple (repeated_times_per_epoch, weight) def read_data_weights(weights_file, data_sources): data_weights = {} - with open(weights_file, 'r', encoding="latin-1") as f: + with open(weights_file, 'r', encoding="utf-8") as f: for line in f: try: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 3 if fields[0] in data_weights: raise Exception("duplicated data source({0}) specified in " @@ -97,7 +94,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles): num_outputs = len(output_filehandles) n = 0 try: - f = open(source_filename, 'r', encoding="latin-1") + f = open(source_filename, 'r', encoding="utf-8") except Exception as e: sys.exit(sys.argv[0] + ": failed to open file {0} for reading: {1} ".format( source_filename, str(e))) @@ -124,7 +121,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles): os.makedirs(args.split_dir + "/info") # set up the 'num_splits' file, which contains an integer. -with open("{0}/info/num_splits".format(args.split_dir), 'w', encoding="latin-1") as f: +with open("{0}/info/num_splits".format(args.split_dir), 'w', encoding="utf-8") as f: print(args.num_splits, file=f) # e.g. set temp_files = [ 'foo/1.tmp', 'foo/2.tmp', ..., 'foo/5.tmp' ] @@ -136,7 +133,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles): temp_filehandles = [] for fname in temp_files: try: - temp_filehandles.append(open(fname, 'w', encoding="latin-1")) + temp_filehandles.append(open(fname, 'w', encoding="utf-8")) except Exception as e: sys.exit(sys.argv[0] + ": failed to open file: " + str(e) + ".. if this is a max-open-filehandles limitation, you may " diff --git a/scripts/rnnlm/rnnlm_cleanup.py b/scripts/rnnlm/rnnlm_cleanup.py index 40cbee7a496..6a304f7f4cb 100644 --- a/scripts/rnnlm/rnnlm_cleanup.py +++ b/scripts/rnnlm/rnnlm_cleanup.py @@ -69,7 +69,7 @@ def get_compute_prob_info(log_file): compute_prob_done = False # roughly based on code in get_best_model.py try: - f = open(log_file, "r", encoding="latin-1") + f = open(log_file, "r", encoding="utf-8") except: print(script_name + ": warning: compute_prob log not found for iteration " + str(iter) + ". Skipping", diff --git a/scripts/rnnlm/show_word_features.py b/scripts/rnnlm/show_word_features.py index 89b134adaf9..89d84d53f3e 100755 --- a/scripts/rnnlm/show_word_features.py +++ b/scripts/rnnlm/show_word_features.py @@ -6,16 +6,7 @@ import os import argparse import sys - -# The use of latin-1 encoding does not preclude reading utf-8. latin-1 encoding -# means "treat words as sequences of bytes", and it is compatible with utf-8 -# encoding as well as other encodings such as gbk, as long as the spaces are -# also spaces in ascii (which we check). It is basically how we emulate the -# behavior of python before python3. -sys.stdout = open(1, 'w', encoding='latin-1', closefd=False) - -import re -tab_or_space = re.compile('[ \t]+') +sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) parser = argparse.ArgumentParser(description="This script turns the word features to a human readable format.", epilog="E.g. " + sys.argv[0] + "exp/rnnlm/word_feats.txt exp/rnnlm/features.txt " @@ -36,9 +27,9 @@ def read_feature_type_and_key(features_file): feat_types = {} - with open(features_file, 'r', encoding="latin-1") as f: + with open(features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [2, 3, 4]) feat_id = int(fields[0]) @@ -53,9 +44,9 @@ def read_feature_type_and_key(features_file): feat_type_and_key = read_feature_type_and_key(args.features_file) num_word_feats = 0 -with open(args.word_features_file, 'r', encoding="latin-1") as f: +with open(args.word_features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) % 2 == 1 print(int(fields[0]), end='\t') diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh index 013e9a56c2f..f056d096120 100755 --- a/scripts/rnnlm/train_rnnlm.sh +++ b/scripts/rnnlm/train_rnnlm.sh @@ -41,7 +41,7 @@ use_gpu_for_diagnostics=false # set true to use GPU for compute_prob_*.log # optional cleanup options cleanup=false # add option --cleanup true to enable automatic cleanup of old models cleanup_strategy="keep_latest" # determines cleanup strategy, use either "keep_latest" or "keep_best" -cleanup_keep_iters=3 # number of iterations that will have their models retained +cleanup_keep_iters=100 # number of iterations that will have their models retained trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM . utils/parse_options.sh diff --git a/scripts/rnnlm/validate_features.py b/scripts/rnnlm/validate_features.py index 2a077da4758..a650092b086 100755 --- a/scripts/rnnlm/validate_features.py +++ b/scripts/rnnlm/validate_features.py @@ -7,9 +7,6 @@ import argparse import sys -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="Validates features file, produced by rnnlm/choose_features.py.", epilog="E.g. " + sys.argv[0] + " exp/rnnlm/features.txt", formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -24,7 +21,7 @@ if not os.path.isfile(args.features_file): sys.exit(sys.argv[0] + ": Expected file {0} to exist".format(args.features_file)) -with open(args.features_file, 'r', encoding="latin-1") as f: +with open(args.features_file, 'r', encoding="utf-8") as f: has_unigram = False has_length = False idx = 0 @@ -33,7 +30,7 @@ final_feats = {} word_feats = {} for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [3, 4, 5]) assert idx == int(fields[0]) diff --git a/scripts/rnnlm/validate_text_dir.py b/scripts/rnnlm/validate_text_dir.py index 903e720bdf4..d644d77911e 100755 --- a/scripts/rnnlm/validate_text_dir.py +++ b/scripts/rnnlm/validate_text_dir.py @@ -7,9 +7,6 @@ import argparse import sys -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="Validates data directory containing text " "files from one or more data sources, including dev.txt.", epilog="E.g. " + sys.argv[0] + " data/rnnlm/data", @@ -40,7 +37,7 @@ def check_text_file(text_file): - with open(text_file, 'r', encoding="latin-1") as f: + with open(text_file, 'r', encoding="utf-8") as f: found_nonempty_line = False lineno = 0 if args.allow_internal_eos == 'true': @@ -54,7 +51,7 @@ def check_text_file(text_file): lineno += 1 if args.spot_check == 'true' and lineno > 10: break - words = re.split(tab_or_space, line) + words = line.split() if len(words) != 0: found_nonempty_line = True for word in words: @@ -76,9 +73,9 @@ def check_text_file(text_file): # with some kind of utterance-id first_field_set = set() other_fields_set = set() - with open(text_file, 'r', encoding="latin-1") as f: + with open(text_file, 'r', encoding="utf-8") as f: for line in f: - array = re.split(tab_or_space, line) + array = line.split() if len(array) > 0: first_word = array[0] if first_word in first_field_set or first_word in other_fields_set: diff --git a/scripts/rnnlm/validate_word_features.py b/scripts/rnnlm/validate_word_features.py index 205b934ae1b..3dc9b23aa41 100755 --- a/scripts/rnnlm/validate_word_features.py +++ b/scripts/rnnlm/validate_word_features.py @@ -7,9 +7,6 @@ import argparse import sys -import re -tab_or_space = re.compile('[ \t]+') - parser = argparse.ArgumentParser(description="Validates word features file, produced by rnnlm/get_word_features.py.", epilog="E.g. " + sys.argv[0] + " --features-file=exp/rnnlm/features.txt " "exp/rnnlm/word_feats.txt", @@ -28,9 +25,9 @@ unigram_feat_id = -1 length_feat_id = -1 max_feat_id = -1 -with open(args.features_file, 'r', encoding="latin-1") as f: +with open(args.features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [3, 4, 5]) feat_id = int(fields[0]) @@ -52,9 +49,9 @@ if feat_id > max_feat_id: max_feat_id = feat_id -with open(args.word_features_file, 'r', encoding="latin-1") as f: +with open(args.word_features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) > 0 and len(fields) % 2 == 1 word_id = int(fields[0]) From 185da3aa1afd4b5dda886607a504b83394e8a13f Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Thu, 28 Feb 2019 18:17:53 +0530 Subject: [PATCH 034/235] Update clean_txt_dir.sh --- .../s5_gigaword/local/clean_txt_dir.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh index 56891328a89..0f06c037080 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh @@ -6,7 +6,7 @@ stage=0 nj=500 -. ./path.sh +. ./path_venv.sh . ./cmd.sh . ./utils/parse_options.sh @@ -38,7 +38,7 @@ if [ $stage -le 0 ]; then $train_cmd --max_jobs_run 100 JOB=1:$numsplits $outdir/sparrowhawk/log/JOB.log \ local/run_norm.sh \ sparrowhawk_configuration.ascii_proto \ - $SPARROWHAWK_ROOT/language-resources/en/sparrowhawk/ \ + $SPARROWHAWK_ROOT/language-resources/esp/sparrowhawk/ \ $outdir/data \ JOB \ $outdir/sparrowhawk/ From cb393c81f678b704aa14de2b0d304ce4191a1026 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Thu, 28 Feb 2019 18:22:12 +0530 Subject: [PATCH 035/235] Update clean_txt_dir.sh --- egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh index 0f06c037080..60269c0ab7e 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh @@ -6,7 +6,7 @@ stage=0 nj=500 -. ./path_venv.sh +. ./path.sh . ./cmd.sh . ./utils/parse_options.sh From 18a9cb6fe0927fbda13311e0bb4399c3e495e9e2 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Thu, 28 Feb 2019 18:23:25 +0530 Subject: [PATCH 036/235] Update train_pocolm.sh --- egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh index c8adb79383e..964dd3bbcc5 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh @@ -4,7 +4,7 @@ stage=-2 num_words_pocolm=110000 prune_size=1000000 -. ./path_venv.sh +. ./path.sh . ./cmd.sh . ./utils/parse_options.sh From b023638357122da580ea41a8230b4e7ee2b5c69f Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Thu, 28 Feb 2019 18:23:55 +0530 Subject: [PATCH 037/235] Update pocolm_cust.sh --- egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh index c6642f6fcf4..422db15937a 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh @@ -5,7 +5,7 @@ set -euo pipefail -. ./path_venv.sh +. ./path.sh export POCOLM_ROOT=$(cd $KALDI_ROOT/tools/pocolm/; pwd -P) export PATH=$PATH:$POCOLM_ROOT/scripts From 46550f0c598d50df636e5c181611566a7b211085 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Thu, 28 Feb 2019 13:04:51 +0000 Subject: [PATCH 038/235] Cosmetic fixes --- .../s5_gigaword/local/clean_abbrevs_text.py | 7 ++++--- .../s5_gigaword/local/get_unigram_weights_vocab.py | 2 +- egs/fisher_callhome_spanish/s5_gigaword/path.sh | 2 +- .../s5_gigaword/path_venv.sh | 13 ------------- 4 files changed, 6 insertions(+), 18 deletions(-) delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py index 22fc54f18cc..e5dfcd07a1c 100644 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # 2018 Saikiran Valluri, GoVivace inc., @@ -13,7 +13,7 @@ sys.exit() inputfile=codecs.open(sys.argv[1], encoding='utf-8') -outputfile=codecs.open(sys.argv[2], encoding='utf-8', mode='w+') +outputfile=codecs.open(sys.argv[2], encoding='utf-8', mode='w') for line in inputfile: words = line.split() @@ -26,7 +26,8 @@ textout = textout + " ".join(word) + " " else: textout = textout + word + " " - wordcnt = wordcnt + 1 + if word.isalpha(): + wordcnt = wordcnt + 1 outputfile.write(textout.strip()+ '\n') inputfile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py index 43cf8392167..3ecd16772d7 100644 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # 2018 Saikiran Valluri, GoVivace inc. diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path.sh b/egs/fisher_callhome_spanish/s5_gigaword/path.sh index 2fc3de37406..80edbbaf69a 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/path.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/path.sh @@ -10,4 +10,4 @@ export PATH=$SPARROWHAWK_ROOT/bin:$PATH export LC_ALL=C.UTF-8 export LANG=C.UTF-8 - +source ~/anaconda/bin/activate py36 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh b/egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh deleted file mode 100755 index 80edbbaf69a..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/path_venv.sh +++ /dev/null @@ -1,13 +0,0 @@ -export KALDI_ROOT=`pwd`/../../../../kaldi -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH -[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 -. $KALDI_ROOT/tools/config/common_path.sh -export LD_LIBRARY_PATH=/home/dpovey/libs - -export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk -export PATH=$SPARROWHAWK_ROOT/bin:$PATH -export LC_ALL=C.UTF-8 -export LANG=C.UTF-8 - -source ~/anaconda/bin/activate py36 From ce3c7d7a2169113fb6bb7fd0b395250f4f123c12 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Thu, 28 Feb 2019 18:41:36 +0530 Subject: [PATCH 039/235] Update path.sh --- egs/fisher_callhome_spanish/s5_gigaword/path.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path.sh b/egs/fisher_callhome_spanish/s5_gigaword/path.sh index 80edbbaf69a..d2c2937d81e 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/path.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/path.sh @@ -9,5 +9,3 @@ export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk export PATH=$SPARROWHAWK_ROOT/bin:$PATH export LC_ALL=C.UTF-8 export LANG=C.UTF-8 - -source ~/anaconda/bin/activate py36 From deeaaa76ce6a89fd500a917f0793eaab93d63356 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Fri, 1 Mar 2019 07:22:40 -0500 Subject: [PATCH 040/235] Bug fix in text normalisation script for gigaword corpus --- .../s5_gigaword/local/clean_abbrevs_text.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py index e5dfcd07a1c..a6edc0f92c5 100644 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py @@ -20,14 +20,14 @@ textout = "" wordcnt = 0 for word in words: - if re.match(r"\b([A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ])+[']?s?\b", word) and wordcnt>0: - print(word) - word = re.sub('\'?s', 's', word) - textout = textout + " ".join(word) + " " + if re.match(r"\b([A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ])+[']?s?\b", word): + if wordcnt > 0: + word = re.sub('\'?s', 's', word) + textout = textout + " ".join(word) + " " + else: + textout = textout + word + " " else: - textout = textout + word + " " - if word.isalpha(): - wordcnt = wordcnt + 1 + if word.isalpha(): wordcnt = wordcnt + 1 outputfile.write(textout.strip()+ '\n') inputfile.close() From 633f21d33a53228ca870821ce6a2e5a432c4e9f6 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Fri, 1 Mar 2019 20:26:37 +0530 Subject: [PATCH 041/235] small Fix path.sh --- egs/fisher_callhome_spanish/s5_gigaword/path.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path.sh b/egs/fisher_callhome_spanish/s5_gigaword/path.sh index d2c2937d81e..e622e7d5051 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/path.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/path.sh @@ -1,4 +1,4 @@ -export KALDI_ROOT=`pwd`/../../../../kaldi +export KALDI_ROOT=`pwd`/../../../ [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 From 8d6b14d1f75c9f532ab945e1328c8d925cf21064 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Fri, 1 Mar 2019 21:17:29 +0530 Subject: [PATCH 042/235] Update clean_abbrevs_text.py --- .../s5_gigaword/local/clean_abbrevs_text.py | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py index a6edc0f92c5..7d92eb9fe3a 100644 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py @@ -27,6 +27,7 @@ else: textout = textout + word + " " else: + textout = textout + word + " " if word.isalpha(): wordcnt = wordcnt + 1 outputfile.write(textout.strip()+ '\n') From 8c9c37bad8eba62d20231dda0d34553a6ce12c1b Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Fri, 1 Mar 2019 15:54:00 +0000 Subject: [PATCH 043/235] Added sparrowhawk installation script for text normalisation --- tools/install_sparrowhawk.sh | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 tools/install_sparrowhawk.sh diff --git a/tools/install_sparrowhawk.sh b/tools/install_sparrowhawk.sh new file mode 100755 index 00000000000..f9bbcb1b28e --- /dev/null +++ b/tools/install_sparrowhawk.sh @@ -0,0 +1,73 @@ +#!/bin/bash +export LDFLAGS="-L`pwd`/openfst/lib" +export CXXFLAGS="-I`pwd`/openfst/include" +stage=0 + +if [ $stage -le 0 ] ; then + git clone -b feature/Spanish_normalizer https://github.com/spokencloud/sparrowhawk-resources.git || exit 1; + patch -p0 < sparrowhawk-resources/local/Makefile.patch || exit 1; + make openfst || exit 1; + git clone https://github.com/mjansche/thrax.git + export LDFLAGS=-L`pwd`/openfst/lib + export CXXFLAGS=-I`pwd`/openfst/include + cd thrax + autoreconf --force --install || exit 1; + ./configure --prefix=`pwd` || exit 1; + make || exit 1; + make install || exit 1; + cd .. + git clone https://github.com/google/re2.git || exit 1; + cd re2/ + make -j 20 || exit 1; + make test || exit 1; + make install prefix=`pwd` || exit 1; + cd .. + git clone https://github.com/google/protobuf.git || exit 1; + cd protobuf/ + ./autogen.sh || exit 1; + ./configure --prefix=`pwd` || exit 1; + make -j 20 || exit 1; + make install || exit 1; + cd .. +fi + +if [ $stage -le 1 ]; then + git clone https://github.com/google/sparrowhawk.git || exit 1; + patch -p0 < sparrowhawk-resources/local/sparrowhawk.patch || exit 1; + cd sparrowhawk/ || exit 1; + mkdir lib + mkdir bin + mkdir include + cp -r ../openfst/lib/* lib/ || exit 1; + cp -r ../protobuf/lib/* lib/ || exit 1; + cp -r ../re2/lib/* lib/ || exit 1; + cp -r ../thrax/lib/* lib/ || exit 1; + cp -r ../openfst/include/* include/ || exit 1; + cp -r ../protobuf/include/* include/ || exit 1; + cp -r ../re2/include/* include/ || exit 1; + cp -r ../thrax/include/* include/ || exit 1; + cp ../protobuf/bin/protoc bin/. || exit 1; + export PATH=`pwd`/bin:$PATH + aclocal || exit 1; + automake || exit 1; + ./configure --prefix=`pwd` CPPFLAGS="-I`pwd`/include" LDFLAGS="-L`pwd`/lib" || exit 1; + make || exit 1; + make install || exit 1; + cd .. +fi + +if [ $stage -le 2 ]; then + source ~/anaconda/bin/activate py27 || exit 1; + cp -r sparrowhawk-resources/language-resources sparrowhawk/ || exit 1; + cd sparrowhawk/language-resources/en/textnorm/classifier || exit 1; + . ./path.sh || exit 1; + python create_far.py ascii.syms universal_depot_ascii universal_depot universal_depot.far + thraxmakedep tokenize_and_classify.grm || exit 1; + make || exit 1; + cd ../verbalizer + python create_far.py ascii.syms number_names_depot_ascii number_names_depot number_names_depot.far + cp -r ../classifier/universal_depot.far . + thraxmakedep verbalize.grm || exit 1; + make || exit 1; + cd ../../../../.. +fi From 2e26464accd93b6d9949406e2dbda9450273e5f1 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 1 Mar 2019 15:17:08 -0500 Subject: [PATCH 044/235] [scripts] Make some cleanup scripts work with python3 (#3054) --- .../steps/cleanup/internal/align_ctm_ref.py | 2 +- .../steps/cleanup/internal/get_ctm_edits.py | 10 +++---- .../cleanup/internal/get_non_scored_words.py | 4 +-- .../steps/cleanup/internal/get_pron_stats.py | 28 +++++++++---------- .../cleanup/internal/make_one_biased_lm.py | 12 ++++---- .../cleanup/internal/modify_ctm_edits.py | 8 +++--- .../cleanup/internal/segment_ctm_edits.py | 17 +++++------ egs/wsj/s5/steps/cleanup/make_biased_lms.py | 16 ++++++----- 8 files changed, 51 insertions(+), 46 deletions(-) diff --git a/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py b/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py index 848ca61ebe4..d3e012da13c 100755 --- a/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py +++ b/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py @@ -127,7 +127,7 @@ def read_text(text_file): "Did not get enough columns; line {0} in {1}" "".format(line, text_file.name)) elif len(parts) == 1: - logger.warn("Empty transcript for utterance %s in %s", + logger.warn("Empty transcript for utterance %s in %s", parts[0], text_file.name) yield parts[0], [] else: diff --git a/egs/wsj/s5/steps/cleanup/internal/get_ctm_edits.py b/egs/wsj/s5/steps/cleanup/internal/get_ctm_edits.py index a19c5344572..3032a4b434a 100755 --- a/egs/wsj/s5/steps/cleanup/internal/get_ctm_edits.py +++ b/egs/wsj/s5/steps/cleanup/internal/get_ctm_edits.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2016 Vimal Manohar # 2016 Johns Hopkins University (author: Daniel Povey) @@ -116,17 +116,17 @@ def OpenFiles(): global ctm_edits_out, edits_in, ctm_in, symbol_table, oov_word try: - ctm_edits_out = open(args.ctm_edits_out, 'w') + ctm_edits_out = open(args.ctm_edits_out, 'w', encoding='utf-8') except: sys.exit("get_ctm_edits.py: error opening ctm-edits file {0} for output".format( args.ctm_edits_out)) try: - edits_in = open(args.edits_in) + edits_in = open(args.edits_in, encoding='utf-8') except: sys.exit("get_ctm_edits.py: error opening edits file {0} for input".format( args.edits_in)) try: - ctm_in = open(args.ctm_in) + ctm_in = open(args.ctm_in, encoding='utf-8') except: sys.exit("get_ctm_edits.py: error opening ctm file {0} for input".format( args.ctm_in)) @@ -138,7 +138,7 @@ def OpenFiles(): print("get_ctm_edits.py: error: if you set the the --symbol-table option " "you must also set the --oov option", file = sys.stderr) try: - f = open(args.symbol_table, 'r') + f = open(args.symbol_table, 'r', encoding='utf-8') for line in f.readlines(): [ word, integer ] = line.split() if int(integer) == args.oov: diff --git a/egs/wsj/s5/steps/cleanup/internal/get_non_scored_words.py b/egs/wsj/s5/steps/cleanup/internal/get_non_scored_words.py index aa71fa47d84..69e0242eafb 100755 --- a/egs/wsj/s5/steps/cleanup/internal/get_non_scored_words.py +++ b/egs/wsj/s5/steps/cleanup/internal/get_non_scored_words.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2016 Vimal Manohar # 2016 Johns Hopkins University (author: Daniel Povey) @@ -90,7 +90,7 @@ def read_lang(lang_dir): raise try: - for line in open(lang_dir + '/words.txt').readlines(): + for line in open(lang_dir + '/words.txt', encoding='utf-8').readlines(): [ word, integer ] = line.split() if int(integer) in silence_word_ints: non_scored_words.add(word) diff --git a/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py b/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py index a33ba85d9fa..3ea217b6589 100755 --- a/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py +++ b/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py @@ -75,14 +75,14 @@ def ReadEntries(file_handle): # Each entry in the list represents the pronounciation candidate(s) of a word. # For each non- word, the entry is a list: [utt_id, word, set(pronunciation_candidates)]. e.g: # [911Mothers_2010W-0010916-0012901-1, other, set('AH DH ER', 'AH DH ER K AH N')] -# For each , we split the phones it aligns to into two parts: "nonsil_left", +# For each , we split the phones it aligns to into two parts: "nonsil_left", # which includes phones before the first silphone, and "nonsil_right", which includes -# phones after the last silphone. For example, for : 'V SIL B AH SIL', +# phones after the last silphone. For example, for : 'V SIL B AH SIL', # nonsil_left is 'V' and nonsil_right is empty ''. After processing an entry # in ctm_prons, we put it in "info" as an entry: [utt_id, word, nonsil_right] # only if it's nonsil_right segment is not empty, which may be used when processing # the next word. -# +# # Normally, one non- word is only aligned to one pronounciation candidate. However # when there is a preceding/following , like in the following example, we # assume the phones aligned to should be statistically distributed @@ -90,7 +90,7 @@ def ReadEntries(file_handle): # Thus we append the "nonsil_left" segment of these phones to the pronounciation # of the preceding word, if the last phone of this pronounciation is not a silence phone, # Similarly we can add a pron candidate to the following word. -# +# # For example, for the following part of a ctm_prons file: # 911Mothers_2010W-0010916-0012901-1 other AH DH ER # 911Mothers_2010W-0010916-0012901-1 K AH N SIL B @@ -99,11 +99,11 @@ def ReadEntries(file_handle): # 911Mothers_2010W-0010916-0012901-1 when W EH N # 911Mothers_2010W-0010916-0012901-1 people P IY P AH L # 911Mothers_2010W-0010916-0012901-1 SIL -# 911Mothers_2010W-0010916-0012901-1 heard HH ER +# 911Mothers_2010W-0010916-0012901-1 heard HH ER # 911Mothers_2010W-0010916-0012901-1 D # 911Mothers_2010W-0010916-0012901-1 that SIL DH AH T # 911Mothers_2010W-0010916-0012901-1 my M AY -# +# # The corresponding segment in the "info" list is: # [911Mothers_2010W-0010916-0012901-1, other, set('AH DH ER', 'AH DH ER K AH N')] # [911Mothers_2010W-0010916-0012901-1, , 'B' @@ -113,7 +113,7 @@ def ReadEntries(file_handle): # [911Mothers_2010W-0010916-0012901-1, , 'D'] # [911Mothers_2010W-0010916-0012901-1, that, set('SIL DH AH T')] # [911Mothers_2010W-0010916-0012901-1, my, set('M AY')] -# +# # Then we accumulate pronouciation stats from "info". Basically, for each occurence # of a word, each pronounciation candidate gets equal soft counts. e.g. In the above # example, each pron candidate of "because" gets a count of 1/4. The stats is stored @@ -139,20 +139,20 @@ def GetStatsFromCtmProns(silphones, optional_silence, non_scored_words, ctm_pron # So we apply the same merging method in these cases. if word == '' or (word in non_scored_words and word != '' and word != ''): nonsil_left = [] - nonsil_right = [] + nonsil_right = [] for phone in phones: if phone in silphones: break nonsil_left.append(phone) - + for phone in reversed(phones): if phone in silphones: break nonsil_right.insert(0, phone) - + # info[-1][0] is the utt_id of the last entry - if len(nonsil_left) > 0 and len(info) > 0 and utt == info[-1][0]: - # pron_ext is a set of extended pron candidates. + if len(nonsil_left) > 0 and len(info) > 0 and utt == info[-1][0]: + # pron_ext is a set of extended pron candidates. pron_ext = set() # info[-1][2] is the set of pron candidates of the last entry. for pron in info[-1][2]: @@ -211,7 +211,7 @@ def GetStatsFromCtmProns(silphones, optional_silence, non_scored_words, ctm_pron stats[(word, phones)] = stats.get((word, phones), 0) + count return stats -def WriteStats(stats, file_handle): +def WriteStats(stats, file_handle): for word_pron, count in stats.items(): print('{0} {1} {2}'.format(count, word_pron[0], word_pron[1]), file=file_handle) file_handle.close() @@ -222,7 +222,7 @@ def Main(): non_scored_words = ReadEntries(args.non_scored_words_file_handle) optional_silence = ReadEntries(args.optional_silence_file_handle) stats = GetStatsFromCtmProns(silphones, optional_silence.pop(), non_scored_words, args.ctm_prons_file_handle) - WriteStats(stats, args.stats_file_handle) + WriteStats(stats, args.stats_file_handle) if __name__ == "__main__": Main() diff --git a/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py b/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py index e41a67705e9..68055729fd9 100755 --- a/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py +++ b/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -142,16 +142,18 @@ def CompletelyDiscountLowCountStates(self, min_count): hist_to_total_count = self.GetHistToTotalCount() for n in reversed(list(range(2, self.ngram_order))): this_order_counts = self.counts[n] + to_delete = [] for hist in this_order_counts.keys(): if hist_to_total_count[hist] < min_count: # we need to completely back off this count. word_to_count = this_order_counts[hist] - del this_order_counts[hist] # delete the key from the dict. + # mark this key for deleting + to_delete.append(hist) backoff_hist = hist[1:] # this will be a tuple not a list. for word, count in word_to_count.items(): self.AddCount(backoff_hist, word, count) - - + for hist in to_delete: + del this_order_counts[hist] # This backs off the counts according to Kneser-Ney (unmodified, # with interpolation). @@ -200,7 +202,7 @@ def AddTopWords(self, top_words_file): word_to_count = self.counts[0][empty_history] total = sum(word_to_count.values()) try: - f = open(top_words_file) + f = open(top_words_file, mode='r', encoding='utf-8') except: sys.exit("make_one_biased_lm.py: error opening top-words file: " "--top-words=" + top_words_file) diff --git a/egs/wsj/s5/steps/cleanup/internal/modify_ctm_edits.py b/egs/wsj/s5/steps/cleanup/internal/modify_ctm_edits.py index d6f0d0f6b23..af63ca27d2b 100755 --- a/egs/wsj/s5/steps/cleanup/internal/modify_ctm_edits.py +++ b/egs/wsj/s5/steps/cleanup/internal/modify_ctm_edits.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2016 Vimal Manohar # 2016 Johns Hopkins University (author: Daniel Povey) @@ -105,7 +105,7 @@ def ReadNonScoredWords(non_scored_words_file): global non_scored_words try: - f = open(non_scored_words_file) + f = open(non_scored_words_file, encoding='utf-8') except: sys.exit("modify_ctm_edits.py: error opening file: " "--non-scored-words=" + non_scored_words_file) @@ -317,12 +317,12 @@ def ProcessUtterance(split_lines_of_utt): def ProcessData(): try: - f_in = open(args.ctm_edits_in) + f_in = open(args.ctm_edits_in, encoding='utf-8') except: sys.exit("modify_ctm_edits.py: error opening ctm-edits input " "file {0}".format(args.ctm_edits_in)) try: - f_out = open(args.ctm_edits_out, 'w') + f_out = open(args.ctm_edits_out, 'w', encoding='utf-8') except: sys.exit("modify_ctm_edits.py: error opening ctm-edits output " "file {0}".format(args.ctm_edits_out)) diff --git a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py index 39d6cb6ed80..e571fefb84c 100755 --- a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py +++ b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 + # Copyright 2016 Vimal Manohar # 2016 Johns Hopkins University (author: Daniel Povey) @@ -894,7 +895,7 @@ def AccWordStatsForUtterance(split_lines_of_utt, def PrintWordStats(word_stats_out): try: - f = open(word_stats_out, 'w') + f = open(word_stats_out, 'w', encoding='utf-8') except: sys.exit("segment_ctm_edits.py: error opening word-stats file --word-stats-out={0} " "for writing".format(word_stats_out)) @@ -924,23 +925,23 @@ def PrintWordStats(word_stats_out): def ProcessData(): try: - f_in = open(args.ctm_edits_in) + f_in = open(args.ctm_edits_in, encoding='utf-8') except: sys.exit("segment_ctm_edits.py: error opening ctm-edits input " "file {0}".format(args.ctm_edits_in)) try: - text_output_handle = open(args.text_out, 'w') + text_output_handle = open(args.text_out, 'w', encoding='utf-8') except: sys.exit("segment_ctm_edits.py: error opening text output " "file {0}".format(args.text_out)) try: - segments_output_handle = open(args.segments_out, 'w') + segments_output_handle = open(args.segments_out, 'w', encoding='utf-8') except: sys.exit("segment_ctm_edits.py: error opening segments output " "file {0}".format(args.text_out)) if args.ctm_edits_out != None: try: - ctm_edits_output_handle = open(args.ctm_edits_out, 'w') + ctm_edits_output_handle = open(args.ctm_edits_out, 'w', encoding='utf-8') except: sys.exit("segment_ctm_edits.py: error opening ctm-edits output " "file {0}".format(args.ctm_edits_out)) @@ -994,7 +995,7 @@ def ProcessData(): def ReadNonScoredWords(non_scored_words_file): global non_scored_words try: - f = open(non_scored_words_file) + f = open(non_scored_words_file, encoding='utf-8') except: sys.exit("segment_ctm_edits.py: error opening file: " "--non-scored-words=" + non_scored_words_file) @@ -1015,7 +1016,7 @@ def ReadNonScoredWords(non_scored_words_file): oov_symbol = None if args.oov_symbol_file != None: try: - with open(args.oov_symbol_file) as f: + with open(args.oov_symbol_file, encoding='utf-8') as f: line = f.readline() assert len(line.split()) == 1 oov_symbol = line.split()[0] diff --git a/egs/wsj/s5/steps/cleanup/make_biased_lms.py b/egs/wsj/s5/steps/cleanup/make_biased_lms.py index ab508eedc9c..4b1fd320221 100755 --- a/egs/wsj/s5/steps/cleanup/make_biased_lms.py +++ b/egs/wsj/s5/steps/cleanup/make_biased_lms.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from __future__ import print_function import sys @@ -55,7 +55,7 @@ def ProcessGroupOfLines(group_of_lines): try: command = "steps/cleanup/internal/make_one_biased_lm.py " + args.lm_opts p = subprocess.Popen(command, shell = True, stdin = subprocess.PIPE, - stdout = sys.stdout, stderr = sys.stderr) + stdout = sys.stdout, stderr = sys.stderr) for line in group_of_lines: a = line.split() if len(a) == 0: @@ -63,13 +63,15 @@ def ProcessGroupOfLines(group_of_lines): utterance_id = a[0] # print to utterance-map file print(utterance_id, group_utterance_id, file = utterance_map_file) - rest_of_line = ' '.join(a[1:]) # get rid of utterance id. - print(rest_of_line, file=p.stdin) + rest_of_line = ' '.join(a[1:]) + '\n' # get rid of utterance id. + p.stdin.write(rest_of_line.encode('utf-8')) p.stdin.close() assert p.wait() == 0 - except Exception as e: - sys.exit("make_biased_lms.py: error calling subprocess, command was: " + - command + ", error was : " + str(e)) + except Exception: + sys.stderr.write( + "make_biased_lms.py: error calling subprocess, command was: " + + command) + raise # Print a blank line; this terminates the FST in the Kaldi fst-archive # format. print("") From c6b05d18597612170148a3ad7b313dc192d62de4 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Sat, 2 Mar 2019 06:02:57 +0000 Subject: [PATCH 045/235] G2P training stage added into Spanish gigaword recipe --- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 8 ++++++++ tools/extras/install_g2p_seq2seq.sh | 5 +++++ tools/install_g2p_seq2seq.sh | 1 + 3 files changed, 14 insertions(+) create mode 100644 tools/extras/install_g2p_seq2seq.sh create mode 120000 tools/install_g2p_seq2seq.sh diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index bd553fc720e..7e488cdc5fa 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -39,6 +39,9 @@ if [ $stage -le -1 ]; then # ES gigaword corpus to bring the total to 64k words. The ES frequency sorted # wordlist is downloaded if it is not available. local/fsp_prepare_dict.sh $spanish_lexicon + ( + steps/dict/train_g2p_seq2seq.sh data/local/dict/lexicon.txt exp/g2p || touch exp/g2p/.error + ) & # Added c,j, v to the non silences phones manually utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang @@ -74,6 +77,11 @@ if [ $stage -le -1 ]; then local/create_splits.sh $split local/callhome_create_splits.sh $split_callhome + wait # wait till G2P training finishes + if [ -f exp/g2p/.error ]; then + rm exp/g2p/.error || true + echo "Fail to train the G2P model." && exit 1; + fi fi if [ $stage -le 0 ]; then diff --git a/tools/extras/install_g2p_seq2seq.sh b/tools/extras/install_g2p_seq2seq.sh new file mode 100644 index 00000000000..c9979b8b961 --- /dev/null +++ b/tools/extras/install_g2p_seq2seq.sh @@ -0,0 +1,5 @@ +if [ ! -e g2p-seq2seq ];then + git clone https://github.com/cmusphinx/g2p-seq2seq.git + cd g2p-seq2seq/ + python setup.py install +fi diff --git a/tools/install_g2p_seq2seq.sh b/tools/install_g2p_seq2seq.sh new file mode 120000 index 00000000000..77715305f74 --- /dev/null +++ b/tools/install_g2p_seq2seq.sh @@ -0,0 +1 @@ +extras/install_g2p_seq2seq.sh \ No newline at end of file From 8c226cc9b0995c9a656a20484587d46ed28e5fee Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Sat, 2 Mar 2019 06:06:28 +0000 Subject: [PATCH 046/235] G2P seq2seq scripts added in steps/ --- egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh | 42 ++++++++++++++++++++++ egs/wsj/s5/steps/dict/train_g2p_seq2seq.sh | 39 ++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh create mode 100644 egs/wsj/s5/steps/dict/train_g2p_seq2seq.sh diff --git a/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh b/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh new file mode 100644 index 00000000000..77a08c305dd --- /dev/null +++ b/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright 2018 Govivace Inc. (Author: Valluri Saikiran) +# Apache License 2.0 + +# This script applies a g2p model using CMUsphinx/seq2seq. + +stage=0 +encoding='utf-8' + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 3 ]; then + echo "Usage: $0 [options] " + echo " where is the training lexicon (one pronunciation per " + echo " word per line, with lines like 'hello h uh l ow') and" + echo " is directory where the models will be stored" + exit 1; +fi + +lexicon=$1 +wdir=$2 +outdir=$3 + +mkdir -p $outdir + +[ ! -f $lexicon ] && echo "Cannot find $lexicon" && exit + +if [ ! -s `which g2p-seq2seq` ] ; then + echo "g2p-seq2seq was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_g2p_seq2seq.sh" + exit 1 +fi + +g2p-seq2seq --decode $lexicon --model_dir $wdir --output $outdir/lexicon.lex + diff --git a/egs/wsj/s5/steps/dict/train_g2p_seq2seq.sh b/egs/wsj/s5/steps/dict/train_g2p_seq2seq.sh new file mode 100644 index 00000000000..e0389171fd5 --- /dev/null +++ b/egs/wsj/s5/steps/dict/train_g2p_seq2seq.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright 2018 Govivace Inc. (Author: Valluri Saikiran) +# Apache License 2.0 + +# This script trains a g2p model using CMUsphinx/seq2seq. + +stage=0 +encoding='utf-8' + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 2 ]; then + echo "Usage: $0 [options] " + echo " where is the training lexicon (one pronunciation per " + echo " word per line, with lines like 'hello h uh l ow') and" + echo " is directory where the models will be stored" + exit 1; +fi + +lexicon=$1 +wdir=$2 + +[ ! -f $lexicon ] && echo "Cannot find $lexicon" && exit + +if [ ! -s `which g2p-seq2seq` ]; then + echo "g2p-seq2seq was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_g2p_seq2seq.sh" + exit 1 +fi + +g2p-seq2seq --max_epochs 12 --train $lexicon --model_dir $wdir + From 7b67fc2ade32fa7449a3a228903c920f499a2c3c Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Sat, 2 Mar 2019 12:09:40 +0000 Subject: [PATCH 047/235] RNNLM scripts updated to UTF8 encoding --- scripts/rnnlm/choose_features.py | 12 +++++++++--- scripts/rnnlm/get_best_model.py | 24 +++++++++++++----------- scripts/rnnlm/get_embedding_dim.py | 2 +- scripts/rnnlm/get_num_splits.sh | 2 +- scripts/rnnlm/get_special_symbol_opts.py | 8 ++++++-- scripts/rnnlm/get_unigram_probs.py | 18 +++++++++++------- scripts/rnnlm/get_vocab.py | 7 +++++-- scripts/rnnlm/get_word_features.py | 15 +++++++++------ scripts/rnnlm/lmrescore.sh | 6 ++++++ scripts/rnnlm/lmrescore_nbest.sh | 4 ++-- scripts/rnnlm/lmrescore_pruned.sh | 17 +++++++++++++---- scripts/rnnlm/prepare_rnnlm_dir.sh | 9 +++++++-- scripts/rnnlm/prepare_split_data.py | 5 ++++- scripts/rnnlm/show_word_features.py | 13 +++++++++++-- scripts/rnnlm/train_rnnlm.sh | 2 +- scripts/rnnlm/validate_features.py | 5 ++++- scripts/rnnlm/validate_text_dir.py | 7 +++++-- scripts/rnnlm/validate_word_features.py | 7 +++++-- 18 files changed, 113 insertions(+), 50 deletions(-) diff --git a/scripts/rnnlm/choose_features.py b/scripts/rnnlm/choose_features.py index 799f6b6dcc8..c6621e04494 100755 --- a/scripts/rnnlm/choose_features.py +++ b/scripts/rnnlm/choose_features.py @@ -10,6 +10,12 @@ from collections import defaultdict sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) +# because this script splits inside words, we cannot use latin-1; we actually need to know what +# what the encoding is. By default we make this utf-8; to handle encodings that are not compatible +# with utf-8 (e.g. gbk), we'll eventually have to make the encoding an option to this script. + +import re +tab_or_space = re.compile('[ \t]+') parser = argparse.ArgumentParser(description="This script chooses the sparse feature representation of words. " "To be more specific, it chooses the set of features-- you compute " @@ -84,9 +90,9 @@ # and 'wordlist' is a list indexed by integer id, that returns the string-valued word. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="utf-8", errors='replace') as f: + with open(vocab_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -115,7 +121,7 @@ def read_unigram_probs(unigram_probs_file): unigram_probs = [] with open(unigram_probs_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 2 idx = int(fields[0]) if idx >= len(unigram_probs): diff --git a/scripts/rnnlm/get_best_model.py b/scripts/rnnlm/get_best_model.py index 45487b18b0c..ed266346e06 100755 --- a/scripts/rnnlm/get_best_model.py +++ b/scripts/rnnlm/get_best_model.py @@ -3,14 +3,14 @@ # Copyright 2017 Johns Hopkins University (author: Daniel Povey) # License: Apache 2.0. -import os import argparse -import sys +import glob import re +import sys parser = argparse.ArgumentParser(description="Works out the best iteration of RNNLM training " - "based on dev-set perplexity, and prints the number corresponding " - "to that iteration", + "based on dev-set perplexity, and prints the number corresponding " + "to that iteration", epilog="E.g. " + sys.argv[0] + " exp/rnnlm_a", formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -19,8 +19,7 @@ args = parser.parse_args() - -num_iters=None +num_iters = None try: with open(args.rnnlm_dir + "/info.txt", encoding="utf-8") as f: for line in f: @@ -36,15 +35,15 @@ sys.exit(sys.argv[0] + ": could not get num_iters from {0}/info.txt".format( args.rnnlm_dir)) -best_objf=-2000 -best_iter=-1 -for i in range(num_iters): +best_objf = -2000 +best_iter = -1 +for i in range(1, num_iters): this_logfile = "{0}/log/compute_prob.{1}.log".format(args.rnnlm_dir, i) try: f = open(this_logfile, 'r', encoding='utf-8') except: sys.exit(sys.argv[0] + ": could not open log-file {0}".format(this_logfile)) - this_objf=-1000 + this_objf = -1000 for line in f: m = re.search('Overall objf .* (\S+)$', str(line)) if m is not None: @@ -53,6 +52,10 @@ except Exception as e: sys.exit(sys.argv[0] + ": line in file {0} could not be parsed: {1}, error is: {2}".format( this_logfile, line, str(e))) + # verify this iteration still has model files present + if len(glob.glob("{0}/{1}.raw".format(args.rnnlm_dir, i))) == 0: + # this iteration has log files, but model files have been cleaned up, skip it + continue if this_objf == -1000: print(sys.argv[0] + ": warning: could not parse objective function from {0}".format( this_logfile), file=sys.stderr) @@ -63,5 +66,4 @@ if best_iter == -1: sys.exit(sys.argv[0] + ": error: could not get best iteration.") - print(str(best_iter)) diff --git a/scripts/rnnlm/get_embedding_dim.py b/scripts/rnnlm/get_embedding_dim.py index b6810ef2cbf..1d516e0edf5 100755 --- a/scripts/rnnlm/get_embedding_dim.py +++ b/scripts/rnnlm/get_embedding_dim.py @@ -101,4 +101,4 @@ "nnet '{0}': {1} != {2}".format( args.nnet, input_dim, output_dim)) -print(str(input_dim)) +print('{}'.format(input_dim)) diff --git a/scripts/rnnlm/get_num_splits.sh b/scripts/rnnlm/get_num_splits.sh index 93d1f7f169c..974fd8bf204 100755 --- a/scripts/rnnlm/get_num_splits.sh +++ b/scripts/rnnlm/get_num_splits.sh @@ -65,7 +65,7 @@ tot_with_multiplicities=0 for f in $text/*.counts; do if [ "$f" != "$text/dev.counts" ]; then - this_tot=$(cat $f | awk '{tot += $2} END{print tot}') + this_tot=$(cat $f | awk '{tot += $2} END{printf("%d", tot)}') if ! [ $this_tot -gt 0 ]; then echo "$0: there were no counts in counts file $f" 1>&2 exit 1 diff --git a/scripts/rnnlm/get_special_symbol_opts.py b/scripts/rnnlm/get_special_symbol_opts.py index 13fe497faf9..0cf8e10feca 100755 --- a/scripts/rnnlm/get_special_symbol_opts.py +++ b/scripts/rnnlm/get_special_symbol_opts.py @@ -8,6 +8,9 @@ import argparse import sys +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script checks whether the special symbols " "appear in words.txt with expected values, if not, it will " "print out the options with correct value to stdout, which may look like " @@ -25,9 +28,10 @@ lower_ids = {} upper_ids = {} -input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace') +input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') for line in input_stream: - fields = line.split() + fields = re.split(tab_or_space, line) + assert(len(fields) == 2) sym = fields[0] if sym in special_symbols: assert sym not in lower_ids diff --git a/scripts/rnnlm/get_unigram_probs.py b/scripts/rnnlm/get_unigram_probs.py index 32b01728ca3..d115b6f54bf 100755 --- a/scripts/rnnlm/get_unigram_probs.py +++ b/scripts/rnnlm/get_unigram_probs.py @@ -7,6 +7,9 @@ import argparse import sys +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script gets the unigram probabilities of words.", epilog="E.g. " + sys.argv[0] + " --vocab-file=data/rnnlm/vocab/words.txt " "--data-weights-file=exp/rnnlm/data_weights.txt data/rnnlm/data " @@ -74,10 +77,10 @@ def get_all_data_sources_except_dev(text_dir): # value is a tuple (repeated_times_per_epoch, weight) def read_data_weights(weights_file, data_sources): data_weights = {} - with open(weights_file, 'r', encoding="utf-8", errors='replace') as f: + with open(weights_file, 'r', encoding="utf-8") as f: for line in f: try: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 3 if fields[0] in data_weights: raise Exception("duplicated data source({0}) specified in " @@ -99,9 +102,9 @@ def read_data_weights(weights_file, data_sources): # return the vocab, which is a dict mapping the word to a integer id. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="utf-8", errors='replace') as f: + with open(vocab_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -128,10 +131,11 @@ def get_counts(data_sources, data_weights, vocab): if weight == 0.0: continue - with open(counts_file, 'r', encoding="utf-8", errors='replace') as f: + with open(counts_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() - assert len(fields) == 2 + fields = re.split(tab_or_space, line) + if len(fields) != 2: print("Warning, should be 2 cols:", fields, line, file=sys.stderr); + assert(len(fields) == 2) word = fields[0] count = fields[1] if word not in vocab: diff --git a/scripts/rnnlm/get_vocab.py b/scripts/rnnlm/get_vocab.py index f290ef721c1..d65f8e3669b 100755 --- a/scripts/rnnlm/get_vocab.py +++ b/scripts/rnnlm/get_vocab.py @@ -8,6 +8,9 @@ import sys sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script get a vocab from unigram counts " "of words produced by get_unigram_counts.sh", epilog="E.g. " + sys.argv[0] + " data/rnnlm/data > data/rnnlm/vocab/words.txt", @@ -27,8 +30,8 @@ def add_counts(word_counts, counts_file): with open(counts_file, 'r', encoding="utf-8") as f: for line in f: - line = line.strip() - word_and_count = line.split() + line = line.strip(" \t\r\n") + word_and_count = re.split(tab_or_space, line) assert len(word_and_count) == 2 if word_and_count[0] in word_counts: word_counts[word_and_count[0]] += int(word_and_count[1]) diff --git a/scripts/rnnlm/get_word_features.py b/scripts/rnnlm/get_word_features.py index 8bdb553b9c8..7555b774b83 100755 --- a/scripts/rnnlm/get_word_features.py +++ b/scripts/rnnlm/get_word_features.py @@ -9,6 +9,9 @@ import math from collections import defaultdict +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script turns the words into the sparse feature representation, " "using features from rnnlm/choose_features.py.", epilog="E.g. " + sys.argv[0] + " --unigram-probs=exp/rnnlm/unigram_probs.txt " @@ -38,9 +41,9 @@ # return the vocab, which is a dict mapping the word to a integer id. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="utf-8", errors='replace') as f: + with open(vocab_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -59,9 +62,9 @@ def read_vocab(vocab_file): # return a list of unigram_probs, indexed by word id def read_unigram_probs(unigram_probs_file): unigram_probs = [] - with open(unigram_probs_file, 'r', encoding="utf-8", errors='replace') as f: + with open(unigram_probs_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 2 idx = int(fields[0]) if idx >= len(unigram_probs): @@ -100,9 +103,9 @@ def read_features(features_file): feats['min_ngram_order'] = 10000 feats['max_ngram_order'] = -1 - with open(features_file, 'r', encoding="utf-8", errors='replace') as f: + with open(features_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert(len(fields) in [3, 4, 5]) feat_id = int(fields[0]) diff --git a/scripts/rnnlm/lmrescore.sh b/scripts/rnnlm/lmrescore.sh index cd0cf793d8d..9da22ae75a2 100755 --- a/scripts/rnnlm/lmrescore.sh +++ b/scripts/rnnlm/lmrescore.sh @@ -72,6 +72,12 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ || exit 1; +if ! head -n -1 $rnnlm_dir/config/words.txt | cmp $oldlang/words.txt -; then + # the last word of the RNNLM word list is an added word + echo "$0: Word lists mismatch for lattices and RNNLM." + exit 1 +fi + oldlm_command="fstproject --project_output=true $oldlm |" special_symbol_opts=$(cat $rnnlm_dir/special_symbol_opts.txt) diff --git a/scripts/rnnlm/lmrescore_nbest.sh b/scripts/rnnlm/lmrescore_nbest.sh index f50a3c909f0..58b19b9fa79 100755 --- a/scripts/rnnlm/lmrescore_nbest.sh +++ b/scripts/rnnlm/lmrescore_nbest.sh @@ -29,7 +29,7 @@ if [ $# != 6 ]; then echo "This version applies an RNNLM and mixes it with the LM scores" echo "previously in the lattices., controlled by the first parameter (rnnlm-weight)" echo "" - echo "Usage: utils/rnnlmrescore.sh " + echo "Usage: $0 [options] " echo "Main options:" echo " --inv-acwt # default 12. e.g. --inv-acwt 17. Equivalent to LM scale to use." echo " # for N-best list generation... note, we'll score at different acwt's" @@ -177,7 +177,7 @@ fi if [ $stage -le 6 ]; then echo "$0: invoking rnnlm/compute_sentence_scores.sh which calls rnnlm to get RNN LM scores." $cmd JOB=1:$nj $dir/log/rnnlm_compute_scores.JOB.log \ - local/rnnlm/compute_sentence_scores.sh $rnndir $adir.JOB/temp \ + rnnlm/compute_sentence_scores.sh $rnndir $adir.JOB/temp \ $adir.JOB/words_text $adir.JOB/lmwt.rnn fi if [ $stage -le 7 ]; then diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh index 46ee5846424..9ba78415708 100755 --- a/scripts/rnnlm/lmrescore_pruned.sh +++ b/scripts/rnnlm/lmrescore_pruned.sh @@ -16,16 +16,18 @@ max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram- # the same ngram history and this prevents the lattice from # exploding exponentially. Details of the n-gram approximation # method are described in section 2.3 of the paper - # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdm -max_arcs=499 # limit the max arcs in lattice while rescoring. E.g., 20000 + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf +max_arcs= # limit the max arcs in lattice while rescoring. E.g., 20000 -acwt=1 -weight=1 # Interpolation weight for RNNLM. +acwt=0.1 +weight=0.5 # Interpolation weight for RNNLM. normalize=false # If true, we add a normalization step to the output of the RNNLM # so that it adds up to *exactly* 1. Note that this is not necessary # as in our RNNLM setup, a properly trained network would automatically # have its normalization term close to 1. The details of this # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf +lattice_prune_beam=4 # Beam used in pruned lattice composition + # This option affects speed and how large the composed lattice may be # End configuration section. @@ -73,6 +75,12 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ || exit 1; +if ! head -n -1 $rnnlm_dir/config/words.txt | cmp $oldlang/words.txt -; then + # the last word of the RNNLM word list is an added word + echo "$0: Word lists mismatch for lattices and RNNLM." + exit 1 +fi + normalize_opt= if $normalize; then normalize_opt="--normalize-probs=true" @@ -97,6 +105,7 @@ cp $indir/num_jobs $outdir $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=$weight $special_symbol_opts \ + --lattice-compose-beam=$lattice_prune_beam \ --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order $normalize_opt $max_arcs_opt \ $carpa_option $oldlm $word_embedding "$rnnlm_dir/final.raw" \ "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; diff --git a/scripts/rnnlm/prepare_rnnlm_dir.sh b/scripts/rnnlm/prepare_rnnlm_dir.sh index d3ee44f1f95..e101822d983 100755 --- a/scripts/rnnlm/prepare_rnnlm_dir.sh +++ b/scripts/rnnlm/prepare_rnnlm_dir.sh @@ -23,7 +23,7 @@ if [ $# != 3 ]; then echo "Usage: $0 [options] " echo "Sets up the directory for RNNLM training as done by" echo "rnnlm/train_rnnlm.sh, and initializes the model." - echo " is as validated by rnnlm/validate_data_dir.py" + echo " is as validated by rnnlm/validate_text_dir.py" echo " is as validated by rnnlm/validate_config_dir.sh." exit 1 fi @@ -34,6 +34,7 @@ config_dir=$2 dir=$3 set -e +. ./path.sh if [ $stage -le 0 ]; then echo "$0: validating input" @@ -52,9 +53,13 @@ if [ $stage -le 1 ]; then echo "$0: copying config directory" mkdir -p $dir/config # copy expected things from $config_dir to $dir/config. - for f in words.txt features.txt data_weights.txt oov.txt xconfig; do + for f in words.txt data_weights.txt oov.txt xconfig; do cp $config_dir/$f $dir/config done + # features.txt is optional, check separately + if [ -f $config_dir/features.txt ]; then + cp $config_dir/features.txt $dir/config + fi fi rnnlm/get_special_symbol_opts.py < $dir/config/words.txt > $dir/special_symbol_opts.txt diff --git a/scripts/rnnlm/prepare_split_data.py b/scripts/rnnlm/prepare_split_data.py index 9cc4f69d09f..adcb164771d 100755 --- a/scripts/rnnlm/prepare_split_data.py +++ b/scripts/rnnlm/prepare_split_data.py @@ -8,6 +8,9 @@ import argparse import sys +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script prepares files containing integerized text, " "for consumption by nnet3-get-egs.", epilog="E.g. " + sys.argv[0] + " --vocab-file=data/rnnlm/vocab/words.txt " @@ -66,7 +69,7 @@ def read_data_weights(weights_file, data_sources): with open(weights_file, 'r', encoding="utf-8") as f: for line in f: try: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) == 3 if fields[0] in data_weights: raise Exception("duplicated data source({0}) specified in " diff --git a/scripts/rnnlm/show_word_features.py b/scripts/rnnlm/show_word_features.py index 89d84d53f3e..8b69fbb7d8a 100755 --- a/scripts/rnnlm/show_word_features.py +++ b/scripts/rnnlm/show_word_features.py @@ -6,8 +6,17 @@ import os import argparse import sys + +# The use of latin-1 encoding does not preclude reading utf-8. latin-1 encoding +# means "treat words as sequences of bytes", and it is compatible with utf-8 +# encoding as well as other encodings such as gbk, as long as the spaces are +# also spaces in ascii (which we check). It is basically how we emulate the +# behavior of python before python3. sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script turns the word features to a human readable format.", epilog="E.g. " + sys.argv[0] + "exp/rnnlm/word_feats.txt exp/rnnlm/features.txt " "> exp/rnnlm/word_feats.str.txt", @@ -29,7 +38,7 @@ def read_feature_type_and_key(features_file): with open(features_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert(len(fields) in [2, 3, 4]) feat_id = int(fields[0]) @@ -46,7 +55,7 @@ def read_feature_type_and_key(features_file): num_word_feats = 0 with open(args.word_features_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) % 2 == 1 print(int(fields[0]), end='\t') diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh index f056d096120..013e9a56c2f 100755 --- a/scripts/rnnlm/train_rnnlm.sh +++ b/scripts/rnnlm/train_rnnlm.sh @@ -41,7 +41,7 @@ use_gpu_for_diagnostics=false # set true to use GPU for compute_prob_*.log # optional cleanup options cleanup=false # add option --cleanup true to enable automatic cleanup of old models cleanup_strategy="keep_latest" # determines cleanup strategy, use either "keep_latest" or "keep_best" -cleanup_keep_iters=100 # number of iterations that will have their models retained +cleanup_keep_iters=3 # number of iterations that will have their models retained trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM . utils/parse_options.sh diff --git a/scripts/rnnlm/validate_features.py b/scripts/rnnlm/validate_features.py index a650092b086..939e634592c 100755 --- a/scripts/rnnlm/validate_features.py +++ b/scripts/rnnlm/validate_features.py @@ -7,6 +7,9 @@ import argparse import sys +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="Validates features file, produced by rnnlm/choose_features.py.", epilog="E.g. " + sys.argv[0] + " exp/rnnlm/features.txt", formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -30,7 +33,7 @@ final_feats = {} word_feats = {} for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert(len(fields) in [3, 4, 5]) assert idx == int(fields[0]) diff --git a/scripts/rnnlm/validate_text_dir.py b/scripts/rnnlm/validate_text_dir.py index d644d77911e..61914e4836a 100755 --- a/scripts/rnnlm/validate_text_dir.py +++ b/scripts/rnnlm/validate_text_dir.py @@ -7,6 +7,9 @@ import argparse import sys +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="Validates data directory containing text " "files from one or more data sources, including dev.txt.", epilog="E.g. " + sys.argv[0] + " data/rnnlm/data", @@ -51,7 +54,7 @@ def check_text_file(text_file): lineno += 1 if args.spot_check == 'true' and lineno > 10: break - words = line.split() + words = re.split(tab_or_space, line) if len(words) != 0: found_nonempty_line = True for word in words: @@ -75,7 +78,7 @@ def check_text_file(text_file): other_fields_set = set() with open(text_file, 'r', encoding="utf-8") as f: for line in f: - array = line.split() + array = re.split(tab_or_space, line) if len(array) > 0: first_word = array[0] if first_word in first_field_set or first_word in other_fields_set: diff --git a/scripts/rnnlm/validate_word_features.py b/scripts/rnnlm/validate_word_features.py index 3dc9b23aa41..303daf28bb1 100755 --- a/scripts/rnnlm/validate_word_features.py +++ b/scripts/rnnlm/validate_word_features.py @@ -7,6 +7,9 @@ import argparse import sys +import re +tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="Validates word features file, produced by rnnlm/get_word_features.py.", epilog="E.g. " + sys.argv[0] + " --features-file=exp/rnnlm/features.txt " "exp/rnnlm/word_feats.txt", @@ -27,7 +30,7 @@ max_feat_id = -1 with open(args.features_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert(len(fields) in [3, 4, 5]) feat_id = int(fields[0]) @@ -51,7 +54,7 @@ with open(args.word_features_file, 'r', encoding="utf-8") as f: for line in f: - fields = line.split() + fields = re.split(tab_or_space, line) assert len(fields) > 0 and len(fields) % 2 == 1 word_id = int(fields[0]) From d21be2d83f4509fc3917ae58bdf027b528545d4a Mon Sep 17 00:00:00 2001 From: Joachim Fainberg Date: Mon, 4 Mar 2019 21:11:03 +0000 Subject: [PATCH 048/235] [scripts] bug fix to nnet2->3 conversion, fixes #886 (#3071) --- .../s5/steps/nnet3/convert_nnet2_to_nnet3.py | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py b/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py index 66ff633fbfc..edc2f7e4617 100755 --- a/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py +++ b/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py @@ -100,6 +100,7 @@ class Nnet3Model(object): def __init__(self): self.input_dim = -1 self.output_dim = -1 + self.ivector_dim = -1 self.counts = defaultdict(int) self.num_components = 0 self.components_read = 0 @@ -118,7 +119,10 @@ def add_component(self, component, pairs): Component = namedtuple("Component", "ident component pairs") if "" in pairs and self.input_dim == -1: - self.input_dim = pairs[""] + self.input_dim = int(pairs[""]) + + if "" in pairs and self.ivector_dim == -1: + self.ivector_dim = int(pairs[""]) # remove nnet2 specific tokens and catch descriptors if component == "" and "

" in pairs: @@ -159,13 +163,18 @@ def write_config(self, filename): config_string=config_string)) f.write("\n# Component nodes\n") - f.write("input-node name=input dim={0}\n".format(self.input_dim)) + if self.ivector_dim != -1: + f.write("input-node name=input dim={0}\n".format(self.input_dim-self.ivector_dim)) + f.write("input-node name=ivector dim={0}\n".format(self.ivector_dim)) + else: + f.write("input-node name=input dim={0}\n".format(self.input_dim)) previous_component = "input" for component in self.components: if component.ident == "splice": # Create splice string for the next node previous_component = make_splice_string(previous_component, - component.pairs[""]) + component.pairs[""], + component.pairs[""]) continue f.write("component-node name={name} component={name} " "input={inp}\n".format(name=component.ident, @@ -264,7 +273,7 @@ def parse_component(line, line_buffer): pairs = {} if component in SPLICE_COMPONENTS: - pairs = parse_splice_component(component, line, line_buffer) + line, pairs = parse_splice_component(component, line, line_buffer) elif component in AFFINE_COMPONENTS: pairs = parse_affine_component(component, line, line_buffer) elif component == "": @@ -335,7 +344,13 @@ def parse_splice_component(component, line, line_buffer): line = consume_token("", line) context = line.strip()[1:-1].split() - return {"" : input_dim, "" : context} + const_component_dim = 0 + line = next(line_buffer) # Context vector adds newline + line = consume_token("", line) + const_component_dim = int(line.strip().split()[0]) + + return line, {"" : input_dim, "" : context, + "" : const_component_dim} def parse_end_of_component(component, line, line_buffer): # Keeps reading until it hits the end tag for component @@ -422,7 +437,7 @@ def consume_token(token, line): return line.partition(token)[2] -def make_splice_string(nodename, context): +def make_splice_string(nodename, context, const_component_dim=0): """Generates splice string from a list of context. E.g. make_splice_string("renorm4", [-4, 4]) @@ -430,6 +445,8 @@ def make_splice_string(nodename, context): """ assert type(context) == list, "context argument must be a list" string = ["Offset({0}, {1})".format(nodename, i) for i in context] + if const_component_dim > 0: + string.append("ReplaceIndex(ivector, t, 0)") string = "Append(" + ", ".join(string) + ")" return string From 8fa9648c9d88c98e795bb13859ef75069b1024fa Mon Sep 17 00:00:00 2001 From: Justin Luitjens Date: Mon, 4 Mar 2019 15:54:19 -0700 Subject: [PATCH 049/235] [src] Make copies occur in per-thread default stream (for GPUs) (#3068) --- src/cudamatrix/cu-array-inl.h | 21 +++++++++++++++------ src/cudamatrix/cu-matrix.cc | 6 ++++-- src/cudamatrix/cu-packed-matrix.cc | 4 +++- src/cudamatrix/cu-vector.cc | 4 +++- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/cudamatrix/cu-array-inl.h b/src/cudamatrix/cu-array-inl.h index 23b20501d4c..567cc0f6d18 100644 --- a/src/cudamatrix/cu-array-inl.h +++ b/src/cudamatrix/cu-array-inl.h @@ -105,8 +105,9 @@ void CuArrayBase::CopyFromVec(const std::vector &src) { if (CuDevice::Instantiate().Enabled()) { CuTimer tim; CU_SAFE_CALL( - cudaMemcpy(data_, &src.front(), src.size() * sizeof(T), - cudaMemcpyHostToDevice)); + cudaMemcpyAsync(data_, &src.front(), src.size() * sizeof(T), + cudaMemcpyHostToDevice, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -122,7 +123,9 @@ void CuArray::CopyFromVec(const std::vector &src) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cudaMemcpy(this->data_, &src.front(), src.size()*sizeof(T), cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(this->data_, &src.front(), + src.size()*sizeof(T), cudaMemcpyHostToDevice, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -179,7 +182,9 @@ void CuArrayBase::CopyToVec(std::vector *dst) const { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cudaMemcpy(&dst->front(), Data(), this->dim_ * sizeof(T), cudaMemcpyDeviceToHost)); + CU_SAFE_CALL(cudaMemcpyAsync(&dst->front(), Data(), this->dim_ * sizeof(T), + cudaMemcpyDeviceToHost, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuArray::CopyToVecD2H", tim); } else #endif @@ -196,7 +201,9 @@ void CuArrayBase::CopyToHost(T *dst) const { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cudaMemcpy(dst, Data(), this->dim_ * sizeof(T), cudaMemcpyDeviceToHost)); + CU_SAFE_CALL(cudaMemcpyAsync(dst, Data(), this->dim_ * sizeof(T), + cudaMemcpyDeviceToHost, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuArray::CopyToVecD2H", tim); } else #endif @@ -212,7 +219,9 @@ void CuArrayBase::SetZero() { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cudaMemset(this->data_, 0, this->dim_ * sizeof(T))); + CU_SAFE_CALL(cudaMemsetAsync(this->data_, 0, this->dim_ * sizeof(T), + cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuArray::SetZero", tim); } else #endif diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index 247c2236565..1f09ff278ce 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -321,8 +321,10 @@ void CuMatrixBase::CopyFromMat(const MatrixBase &src, MatrixIndexT dst_pitch = stride_*sizeof(Real); MatrixIndexT src_pitch = src.Stride()*sizeof(Real); MatrixIndexT width = src.NumCols()*sizeof(Real); - CU_SAFE_CALL(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, - width, src.NumRows(), cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpy2DAsync(data_, dst_pitch, src.Data(), src_pitch, + width, src.NumRows(), cudaMemcpyHostToDevice, + cudaStreamPerThread)); + cudaStreamSynchronize(cudaStreamPerThread); CuDevice::Instantiate().AccuProfile("CuMatrixBase::CopyFromMat(from CPU)", tim); } else { diff --git a/src/cudamatrix/cu-packed-matrix.cc b/src/cudamatrix/cu-packed-matrix.cc index d4dbdf12143..7581b043ae0 100644 --- a/src/cudamatrix/cu-packed-matrix.cc +++ b/src/cudamatrix/cu-packed-matrix.cc @@ -248,7 +248,9 @@ void CuPackedMatrix::SetZero() { size_t nr = static_cast(num_rows_), num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real); - CU_SAFE_CALL(cudaMemset(reinterpret_cast(this->data_), 0, num_bytes)); + CU_SAFE_CALL(cudaMemsetAsync(reinterpret_cast(this->data_), 0, + num_bytes, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuPackedMatrix::SetZero", tim); } else #endif diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index 536e55d8a3b..7c968c6550d 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -1072,7 +1072,9 @@ void CuVectorBase::SetZero() { KALDI_ASSERT(dim_>=0); KALDI_ASSERT(data_!=NULL); CuTimer tim; - CU_SAFE_CALL(cudaMemset(data_, 0, dim_*sizeof(Real))); + CU_SAFE_CALL(cudaMemsetAsync(data_, 0, dim_*sizeof(Real), + cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuVector::SetZero", tim); } else #endif From bd326dc0578fc2a53590a9cb1bed4bc541c0ef01 Mon Sep 17 00:00:00 2001 From: Justin Luitjens Date: Mon, 4 Mar 2019 15:55:04 -0700 Subject: [PATCH 050/235] [src] Add GPU version of MergeTaskOutput().. relates to batch decoding (#3067) --- src/nnet3/nnet-batch-compute.cc | 34 +++++++++++++++++++++++++++++++++ src/nnet3/nnet-batch-compute.h | 3 +++ 2 files changed, 37 insertions(+) diff --git a/src/nnet3/nnet-batch-compute.cc b/src/nnet3/nnet-batch-compute.cc index 5da55d0f70d..7124afb22b1 100644 --- a/src/nnet3/nnet-batch-compute.cc +++ b/src/nnet3/nnet-batch-compute.cc @@ -863,6 +863,40 @@ void MergeTaskOutput( } KALDI_ASSERT(cur_output_frame == num_output_frames); } +void MergeTaskOutput( + const std::vector &tasks, + CuMatrix *output) { + int32 num_tasks = tasks.size(), + num_output_frames = 0, + output_dim = -1; + for (int32 i = 0; i < num_tasks; i++) { + const NnetInferenceTask &task = tasks[i]; + num_output_frames += task.num_used_output_frames; + if (i == 0) { + output_dim = (task.output_to_cpu ? + task.output_cpu.NumCols() : + task.output.NumCols()); + } + } + KALDI_ASSERT(num_output_frames != 0 && output_dim != 0); + int32 cur_output_frame = 0; + output->Resize(num_output_frames, output_dim); + for (int32 i = 0; i < num_tasks; i++) { + const NnetInferenceTask &task = tasks[i]; + int32 skip = task.num_initial_unused_output_frames, + num_used = task.num_used_output_frames; + KALDI_ASSERT(cur_output_frame == task.first_used_output_frame_index); + if (task.output_to_cpu) { + output->RowRange(cur_output_frame, num_used).CopyFromMat( + task.output_cpu.RowRange(skip, num_used)); + } else { + output->RowRange(cur_output_frame, num_used).CopyFromMat( + task.output.RowRange(skip, num_used)); + } + cur_output_frame += num_used; + } + KALDI_ASSERT(cur_output_frame == num_output_frames); +} NnetBatchInference::NnetBatchInference( diff --git a/src/nnet3/nnet-batch-compute.h b/src/nnet3/nnet-batch-compute.h index 9861a28976c..bdc58e8cb4b 100644 --- a/src/nnet3/nnet-batch-compute.h +++ b/src/nnet3/nnet-batch-compute.h @@ -193,6 +193,9 @@ struct NnetBatchComputerOptions: public NnetSimpleComputationOptions { void MergeTaskOutput( const std::vector &tasks, Matrix *output); +void MergeTaskOutput( + const std::vector &tasks, + CuMatrix *output); /** This class does neural net inference in a way that is optimized for GPU use: From 17b7f3f2b7c1723a57e72edfc26a461a0b6e72cf Mon Sep 17 00:00:00 2001 From: Justin Luitjens Date: Mon, 4 Mar 2019 15:55:54 -0700 Subject: [PATCH 051/235] [src] Add device options to enable tensor core math mode. (#3066) --- src/cudamatrix/cu-device.cc | 10 ++++++++++ src/cudamatrix/cu-device.h | 23 +++++++++++++++++++++++ src/nnet3bin/nnet3-compute-batch.cc | 4 ++++ src/nnet3bin/nnet3-compute.cc | 4 ++++ src/nnet3bin/nnet3-latgen-faster-batch.cc | 4 ++++ src/nnet3bin/nnet3-xvector-compute.cc | 4 ++++ 6 files changed, 49 insertions(+) diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc index 49c179b3673..140275d3b6e 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc @@ -110,6 +110,14 @@ void CuDevice::Initialize() { // Initialize CUBLAS. CUBLAS_SAFE_CALL(cublasCreate(&cublas_handle_)); CUBLAS_SAFE_CALL(cublasSetStream(cublas_handle_, cudaStreamPerThread)); + + if (device_options_.use_tensor_cores) { + // Enable tensor cores in CUBLAS + // Note if the device does not support tensor cores this will fall back to normal math mode + CUBLAS_SAFE_CALL(cublasSetMathMode(cublas_handle_, + CUBLAS_TENSOR_OP_MATH)); + } + // Initialize the cuSPARSE library CUSPARSE_SAFE_CALL(cusparseCreate(&cusparse_handle_)); CUSPARSE_SAFE_CALL(cusparseSetStream(cusparse_handle_, cudaStreamPerThread)); @@ -525,6 +533,8 @@ CuDevice::~CuDevice() { // Each thread has its own copy of the CuDevice object. // Note: this was declared "static". thread_local CuDevice CuDevice::this_thread_device_; + +CuDevice::CuDeviceOptions CuDevice::device_options_; // define and initialize the static members of the CuDevice object. int32 CuDevice::device_id_ = -1; diff --git a/src/cudamatrix/cu-device.h b/src/cudamatrix/cu-device.h index dc3df7e347d..8816f9d223b 100644 --- a/src/cudamatrix/cu-device.h +++ b/src/cudamatrix/cu-device.h @@ -184,8 +184,31 @@ class CuDevice { /// (i.e. from outside the class), call this only if Enabled() returns true. bool IsComputeExclusive(); + // Register command line options for CUDA device. + // This must be done before calling CuDevice::Initialize() + // Example: + // CuDevice::RegisterDeviceOptions(&po); + // po.Read(argc, argv); + // CuDevice::Initialize(); + static void RegisterDeviceOptions(OptionsItf *po) { + CuDevice::device_options_.Register(po); + } ~CuDevice(); private: + + struct CuDeviceOptions { + bool use_tensor_cores; // Enable tensor cores + CuDeviceOptions () : use_tensor_cores(false) {}; + void Register(OptionsItf *po) { + po->Register("cuda-use-tensor-cores", &use_tensor_cores, + "Enable FP16 tensor math. " + "This is higher performance but less accuracy. " + "This is only recommended for inference."); + } + }; + + static CuDeviceOptions device_options_; + // Default constructor used to initialize this_thread_device_ CuDevice(); CuDevice(CuDevice&); // Disallow. diff --git a/src/nnet3bin/nnet3-compute-batch.cc b/src/nnet3bin/nnet3-compute-batch.cc index b0001c96f57..5d4b9b1db48 100644 --- a/src/nnet3bin/nnet3-compute-batch.cc +++ b/src/nnet3bin/nnet3-compute-batch.cc @@ -80,6 +80,10 @@ int main(int argc, char *argv[]) { "priors stored with the model (in this case, " "a .mdl file is expected as input)."); +#if HAVE_CUDA==1 + CuDevice::RegisterDeviceOptions(&po); +#endif + po.Read(argc, argv); if (po.NumArgs() != 3) { diff --git a/src/nnet3bin/nnet3-compute.cc b/src/nnet3bin/nnet3-compute.cc index 45fde99a4f5..cf133025aae 100644 --- a/src/nnet3bin/nnet3-compute.cc +++ b/src/nnet3bin/nnet3-compute.cc @@ -78,6 +78,10 @@ int main(int argc, char *argv[]) { "priors stored with the model (in this case, " "a .mdl file is expected as input)."); +#if HAVE_CUDA==1 + CuDevice::RegisterDeviceOptions(&po); +#endif + po.Read(argc, argv); if (po.NumArgs() != 3) { diff --git a/src/nnet3bin/nnet3-latgen-faster-batch.cc b/src/nnet3bin/nnet3-latgen-faster-batch.cc index fad2d5ed356..ec52cff9776 100644 --- a/src/nnet3bin/nnet3-latgen-faster-batch.cc +++ b/src/nnet3bin/nnet3-latgen-faster-batch.cc @@ -108,6 +108,10 @@ int main(int argc, char *argv[]) { po.Register("use-gpu", &use_gpu, "yes|no|optional|wait, only has effect if compiled with CUDA"); +#if HAVE_CUDA==1 + CuDevice::RegisterDeviceOptions(&po); +#endif + po.Read(argc, argv); if (po.NumArgs() != 4) { diff --git a/src/nnet3bin/nnet3-xvector-compute.cc b/src/nnet3bin/nnet3-xvector-compute.cc index a4bc89a7def..e327681cf9b 100644 --- a/src/nnet3bin/nnet3-xvector-compute.cc +++ b/src/nnet3bin/nnet3-xvector-compute.cc @@ -113,6 +113,10 @@ int main(int argc, char *argv[]) { po.Register("pad-input", &pad_input, "If true, duplicate the first and " "last frames of the input features as required to equal min-chunk-size."); +#if HAVE_CUDA==1 + CuDevice::RegisterDeviceOptions(&po); +#endif + po.Read(argc, argv); if (po.NumArgs() != 3) { From 0a1f827390bc52e82c67a3cc762afb87ee37204f Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Mon, 4 Mar 2019 16:53:05 -0800 Subject: [PATCH 052/235] [src] Log nnet3 computation to VLOG, not std::cout (#3072) --- src/nnet3/decodable-simple-looped.cc | 7 ++----- src/nnet3/nnet-computation.h | 27 ++++++++++++++++----------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/nnet3/decodable-simple-looped.cc b/src/nnet3/decodable-simple-looped.cc index 0452304cf55..71aa7daaa17 100644 --- a/src/nnet3/decodable-simple-looped.cc +++ b/src/nnet3/decodable-simple-looped.cc @@ -52,7 +52,6 @@ DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( Init(opts, &(am_nnet->GetNnet())); } - void DecodableNnetSimpleLoopedInfo::Init( const NnetSimpleLoopedComputationOptions &opts, Nnet *nnet) { @@ -86,10 +85,8 @@ void DecodableNnetSimpleLoopedInfo::Init( CompileLooped(*nnet, opts.optimize_config, request1, request2, request3, &computation); computation.ComputeCudaIndexes(); - if (GetVerboseLevel() >= 3) { - KALDI_VLOG(3) << "Computation is:"; - computation.Print(std::cerr, *nnet); - } + KALDI_VLOG(3) << "Computation is:\n" + << NnetComputationPrintInserter{computation, *nnet}; } diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index 97d8b9045ea..a3571eeb532 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -514,17 +514,22 @@ struct NnetComputation { NnetComputation(): need_model_derivative(false) { } }; - - - -// This operator is to print out the NnetComputation in a human-readable way, for -// debugging purposes. -// We don't give Read and Write functions to struct NnetComputation, because we -// don't anticipate needing to write it to disk. -std::ostream &operator << (std::ostream &os, - NnetComputation &computation); - - +// A helper class equipped with the stream insertion operator<< to print out +// the NnetComputation in a human-readable way, with NnetComputation::Print(), +// for debugging purposes, e.g.: +// KALDI_VLOG(3) << NnetComputationPrintInserter{mycomputation, mynet}; +struct NnetComputationPrintInserter { + const NnetComputation& computation; + const Nnet& nnet; + void Print(std::ostream& os) const { + computation.Print(os, nnet); + } + friend inline std::ostream &operator <<(std::ostream &os, + NnetComputationPrintInserter xhis) { + xhis.Print(os); + return os; + } +}; } // namespace nnet3 } // namespace kaldi From f2a89c232a0932a6cc79cb786990ef340ca131ef Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 4 Mar 2019 19:55:14 -0500 Subject: [PATCH 053/235] [src] Allow upsampling in compute-mfcc-feats, etc. (#3014) --- src/feat/feature-common-inl.h | 32 ++++++++++---------- src/feat/feature-window.h | 15 ++++++---- src/feat/resample.cc | 16 +++++----- src/feat/resample.h | 55 ++++++++++++++++++++++------------- 4 files changed, 69 insertions(+), 49 deletions(-) diff --git a/src/feat/feature-common-inl.h b/src/feat/feature-common-inl.h index b9c5794a629..26127a4dc4d 100644 --- a/src/feat/feature-common-inl.h +++ b/src/feat/feature-common-inl.h @@ -33,26 +33,26 @@ void OfflineFeatureTpl::ComputeFeatures( Matrix *output) { KALDI_ASSERT(output != NULL); BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq; - if (sample_freq == new_sample_freq) + if (sample_freq == new_sample_freq) { Compute(wave, vtln_warp, output); - else { - if (new_sample_freq < sample_freq) { - if (! computer_.GetFrameOptions().allow_downsample) + } else { + if (new_sample_freq < sample_freq && + ! computer_.GetFrameOptions().allow_downsample) KALDI_ERR << "Waveform and config sample Frequency mismatch: " << sample_freq << " .vs " << new_sample_freq - << " ( use --allow_downsample=true option to allow " + << " (use --allow-downsample=true to allow " << " downsampling the waveform)."; - - // Downsample the waveform. - Vector downsampled_wave(wave); - DownsampleWaveForm(sample_freq, wave, - new_sample_freq, &downsampled_wave); - Compute(downsampled_wave, vtln_warp, output); - } else - KALDI_ERR << "New sample Frequency " << new_sample_freq - << " is larger than waveform original sampling frequency " - << sample_freq; - + else if (new_sample_freq > sample_freq && + ! computer_.GetFrameOptions().allow_upsample) + KALDI_ERR << "Waveform and config sample Frequency mismatch: " + << sample_freq << " .vs " << new_sample_freq + << " (use --allow-upsample=true option to allow " + << " upsampling the waveform)."; + // Resample the waveform. + Vector resampled_wave(wave); + ResampleWaveform(sample_freq, wave, + new_sample_freq, &resampled_wave); + Compute(resampled_wave, vtln_warp, output); } } diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index c249414259c..e911055368f 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -40,14 +40,15 @@ struct FrameExtractionOptions { BaseFloat preemph_coeff; // Preemphasis coefficient. bool remove_dc_offset; // Subtract mean of wave before FFT. std::string window_type; // e.g. Hamming window - bool round_to_power_of_two; - BaseFloat blackman_coeff; - bool snip_edges; - bool allow_downsample; // May be "hamming", "rectangular", "povey", "hanning", "blackman" // "povey" is a window I made to be similar to Hamming but to go to zero at the // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) // I just don't think the Hamming window makes sense as a windowing function. + bool round_to_power_of_two; + BaseFloat blackman_coeff; + bool snip_edges; + bool allow_downsample; + bool allow_upsample; FrameExtractionOptions(): samp_freq(16000), frame_shift_ms(10.0), @@ -59,7 +60,8 @@ struct FrameExtractionOptions { round_to_power_of_two(true), blackman_coeff(0.42), snip_edges(true), - allow_downsample(false) { } + allow_downsample(false), + allow_upsample(false) { } void Register(OptionsItf *opts) { opts->Register("sample-frequency", &samp_freq, @@ -90,6 +92,9 @@ struct FrameExtractionOptions { opts->Register("allow-downsample", &allow_downsample, "If true, allow the input waveform to have a higher frequency than " "the specified --sample-frequency (and we'll downsample)."); + opts->Register("allow-upsample", &allow_upsample, + "If true, allow the input waveform to have a lower frequency than " + "the specified --sample-frequency (and we'll upsample)."); } int32 WindowShift() const { return static_cast(samp_freq * 0.001 * frame_shift_ms); diff --git a/src/feat/resample.cc b/src/feat/resample.cc index 518685d85c8..11f4c62bf1c 100644 --- a/src/feat/resample.cc +++ b/src/feat/resample.cc @@ -302,7 +302,7 @@ void ArbitraryResample::Resample(const VectorBase &input, VectorBase *output) const { KALDI_ASSERT(input.Dim() == num_samples_in_ && output->Dim() == weights_.size()); - + int32 output_dim = output->Dim(); for (int32 i = 0; i < output_dim; i++) { SubVector input_part(input, first_index_[i], weights_[i].Dim()); @@ -365,13 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const { return filter * window; } -void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase &wave, - BaseFloat new_freq, Vector *new_wave) { - KALDI_ASSERT(new_freq < orig_freq); - BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq; +void ResampleWaveform(BaseFloat orig_freq, const VectorBase &wave, + BaseFloat new_freq, Vector *new_wave) { + BaseFloat min_freq = std::min(orig_freq, new_freq); + BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq; int32 lowpass_filter_width = 6; - LinearResample signal_downsampler(orig_freq, new_freq, - lowpass_cutoff, lowpass_filter_width); - signal_downsampler.Resample(wave, true, new_wave); + LinearResample resampler(orig_freq, new_freq, + lowpass_cutoff, lowpass_filter_width); + resampler.Resample(wave, true, new_wave); } } // namespace kaldi diff --git a/src/feat/resample.h b/src/feat/resample.h index cc3e5064863..ecac2ba7566 100644 --- a/src/feat/resample.h +++ b/src/feat/resample.h @@ -40,7 +40,7 @@ namespace kaldi { /** \file[resample.h] - + This header contains declarations of classes for resampling signals. The normal cases of resampling a signal are upsampling and downsampling (increasing and decreasing the sample rate of a signal, respectively), @@ -51,7 +51,7 @@ namespace kaldi { The input signal is always evenly spaced, say sampled with frequency S, and we assume the original signal was band-limited to S/2 or lower. The n'th input sample x_n (with n = 0, 1, ...) is interpreted as the original - signal's value at time n/S. + signal's value at time n/S. For resampling, it is convenient to view the input signal as a continuous function x(t) of t, where each sample x_n becomes a delta function @@ -73,14 +73,14 @@ namespace kaldi { means we window the sinc function out to its first zero on the left and right, w = 2 means the second zero, and so on; we normally choose w to be at least two. We call this num_zeros, not w, in the code. - + Convolving the signal x(t) with this windowed filter h(t) = f(t)g(t) and evaluating the resulting signal s(t) at an arbitrary time t is easy: we have \f[ s(t) = 1/S \sum_n x_n h(t - n/S) \f]. (note: the sign of t - n/S might be wrong, but it doesn't matter as the filter and window are symmetric). This is true for arbitrary values of t. What the class ArbitraryResample does - is to allow you to evaluate the signal for specified values of t. + is to allow you to evaluate the signal for specified values of t. */ @@ -90,7 +90,7 @@ namespace kaldi { don't have to be linearly spaced. The low-pass filter cutoff "filter_cutoff_hz" should be less than half the sample rate; "num_zeros" should probably be at least two preferably more; higher numbers give - sharper filters but will be less efficient. + sharper filters but will be less efficient. */ class ArbitraryResample { public: @@ -115,7 +115,7 @@ class ArbitraryResample { /// This version of the Resample function processes just /// one vector. void Resample(const VectorBase &input, - VectorBase *output) const; + VectorBase *output) const; private: void SetIndexes(const Vector &sample_points); @@ -248,20 +248,35 @@ class LinearResample { ///< previously seen input signal. }; -/// Downsample a waveform. This is a convenience wrapper for the -/// class 'LinearResample'. -/// The low-pass filter cutoff used in 'LinearResample' is 0.99 of half of the -/// new_freq and num_zeros is 6. -/// The downsampling results is also checked wit sox resampling toolkit. -/// Sox design is inspired by Laurent De Soras' paper, -/// https://ccrma.stanford.edu/~jos/resample/Implementation.html -/// It designs low pass filter using pass-band, stop-band, Nyquist freq -/// and stop-band attenuation. -/// e.g. The mainlob for Hanning window is 4pi/M, where the main-lobe width is -/// equal to (pass-band-freq - stop-band-freq). -/// Also the cutoff frequency is equal to (pass-band-freq - stop-band-freq). -void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase &wave, - BaseFloat new_freq, Vector *new_wave); +/** + Downsample or upsample a waveform. This is a convenience wrapper for the + class 'LinearResample'. + The low-pass filter cutoff used in 'LinearResample' is 0.99 of the Nyquist, + where the Nyquist is half of the minimum of (orig_freq, new_freq). The + resampling is done with a symmetric FIR filter with N_z (number of zeros) + as 6. + + We compared the downsampling results with those from the sox resampling + toolkit. + Sox's design is inspired by Laurent De Soras' paper, + https://ccrma.stanford.edu/~jos/resample/Implementation.html + + Note: we expect that while orig_freq and new_freq are of type BaseFloat, they + are actually required to have exact integer values (like 16000 or 8000) with + a ratio between them that can be expressed as a rational number with + reasonably small integer factors. +*/ +void ResampleWaveform(BaseFloat orig_freq, const VectorBase &wave, + BaseFloat new_freq, Vector *new_wave); + + +/// This function is deprecated. It is provided for backward compatibility, to avoid +/// breaking older code. +inline void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase &wave, + BaseFloat new_freq, Vector *new_wave) { + ResampleWaveform(orig_freq, wave, new_freq, new_wave); +} + /// @} End of "addtogroup feat" } // namespace kaldi From 98b45c8d458124a50dd93f0d99edc209ac28f38b Mon Sep 17 00:00:00 2001 From: Ke Li Date: Mon, 4 Mar 2019 19:58:38 -0500 Subject: [PATCH 054/235] [src] fix problem with rand_r being undefined on Android (#3037) --- src/base/kaldi-math.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/base/kaldi-math.cc b/src/base/kaldi-math.cc index 991e46a590c..d87179b0464 100644 --- a/src/base/kaldi-math.cc +++ b/src/base/kaldi-math.cc @@ -21,6 +21,7 @@ #include "base/kaldi-math.h" #ifndef _MSC_VER #include +#include #endif #include #include @@ -42,7 +43,7 @@ int32 RoundUpToNearestPowerOfTwo(int32 n) { static std::mutex _RandMutex; int Rand(struct RandomState* state) { -#if defined(_MSC_VER) || defined(__CYGWIN__) +#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS) // On Windows and Cygwin, just call Rand() return rand(); #else From 197214d33b0e2bccecc9323fc6d454a646e809b9 Mon Sep 17 00:00:00 2001 From: Mingkun Huang Date: Tue, 5 Mar 2019 08:59:46 +0800 Subject: [PATCH 055/235] [egs] Update swbd1_map_words.pl, fix them_1's -> them's (#3052) --- egs/swbd/s5c/local/swbd1_map_words.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/swbd/s5c/local/swbd1_map_words.pl b/egs/swbd/s5c/local/swbd1_map_words.pl index 39f90d72816..125e4de0d61 100755 --- a/egs/swbd/s5c/local/swbd1_map_words.pl +++ b/egs/swbd/s5c/local/swbd1_map_words.pl @@ -44,7 +44,7 @@ # which is a mistake in the input. $a =~ s:^\{(.+)\}$:$1:; # e.g. {YUPPIEDOM} -> YUPPIEDOM $a =~ s:[A-Z]\[([^][])+\][A-Z]:$1-$3:i; # e.g. AMMU[N]IT- -> AMMU-IT- - $a =~ s:_\d$::; # e.g. THEM_1 -> THEM + $a =~ s:_\d::; # e.g. THEM_1 -> THEM, THEM_1's -> THEM's } $A[$n] = $a; } From 991a75cd20fde3a3330b2f993afca288fc987f8c Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Tue, 5 Mar 2019 18:01:05 -0800 Subject: [PATCH 056/235] [src] Add const overload OnlineNnet2FeaturePipeline::IvectorFeature (#3073) --- src/online2/online-nnet2-feature-pipeline.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/online2/online-nnet2-feature-pipeline.h b/src/online2/online-nnet2-feature-pipeline.h index d8f933a090d..e379f7263ec 100644 --- a/src/online2/online-nnet2-feature-pipeline.h +++ b/src/online2/online-nnet2-feature-pipeline.h @@ -228,18 +228,25 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { /// rescoring the lattices, this may not be much of an issue. void InputFinished(); - // This function returns the ivector-extracting part of the feature pipeline - // (or NULL if iVectors are not being used); the pointer is owned here and not - // given to the caller. This function is used in nnet3, and also in the - // silence-weighting code used to exclude silence from the iVector estimation. + // This function returns the iVector-extracting part of the feature pipeline + // (or NULL if iVectors are not being used); the pointer ownership is retained + // by this object and not transferred to the caller. This function is used in + // nnet3, and also in the silence-weighting code used to exclude silence from + // the iVector estimation. OnlineIvectorFeature *IvectorFeature() { return ivector_feature_; } + // A const accessor for the iVector extractor. Returns NULL if iVectors are + // not being used. + const OnlineIvectorFeature *IvectorFeature() const { + return ivector_feature_; + } + // This function returns the part of the feature pipeline that would be given // as the primary (non-iVector) input to the neural network in nnet3 // applications. - OnlineFeatureInterface *InputFeature() { + OnlineFeatureInterface *InputFeature() { return feature_plus_optional_pitch_; } From 4432371be83f2229c1215a1c1381ae964f840df8 Mon Sep 17 00:00:00 2001 From: Anton Stakhouski Date: Wed, 6 Mar 2019 19:52:21 +0300 Subject: [PATCH 057/235] [src] Fix syntax error in egs/bn_music_speech/v1/local/make_musan.py (#3074) --- egs/bn_music_speech/v1/local/make_musan.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/bn_music_speech/v1/local/make_musan.py b/egs/bn_music_speech/v1/local/make_musan.py index 942973cfc65..eb739b68180 100755 --- a/egs/bn_music_speech/v1/local/make_musan.py +++ b/egs/bn_music_speech/v1/local/make_musan.py @@ -45,7 +45,7 @@ def prepare_music(root_dir, use_vocals): else: print("Missing file {}".format(utt)) num_bad_files += 1 - print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) + print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_speech(root_dir): @@ -71,7 +71,7 @@ def prepare_speech(root_dir): else: print("Missing file {}".format(utt)) num_bad_files += 1 - print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) + print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_noise(root_dir): @@ -97,7 +97,7 @@ def prepare_noise(root_dir): else: print("Missing file {}".format(utt)) num_bad_files += 1 - print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) + print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def main(): From 8460fa31a177146c0b6b2b75a1ce54678e9033a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=BBelasko?= Date: Wed, 6 Mar 2019 12:50:58 -0500 Subject: [PATCH 058/235] [src] Memory optimization for online feature extraction of long recordings (#3038) --- src/feat/feature-window.h | 6 +++++ src/feat/online-feature-test.cc | 40 ++++++++++++++++++++++++++++ src/feat/online-feature.cc | 47 ++++++++++++++++++++++++++++----- src/feat/online-feature.h | 38 +++++++++++++++++++++----- 4 files changed, 119 insertions(+), 12 deletions(-) diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index e911055368f..2fccaefb9a1 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -49,6 +49,7 @@ struct FrameExtractionOptions { bool snip_edges; bool allow_downsample; bool allow_upsample; + int max_feature_vectors; FrameExtractionOptions(): samp_freq(16000), frame_shift_ms(10.0), @@ -61,6 +62,7 @@ struct FrameExtractionOptions { blackman_coeff(0.42), snip_edges(true), allow_downsample(false), + max_feature_vectors(-1), allow_upsample(false) { } void Register(OptionsItf *opts) { @@ -92,6 +94,10 @@ struct FrameExtractionOptions { opts->Register("allow-downsample", &allow_downsample, "If true, allow the input waveform to have a higher frequency than " "the specified --sample-frequency (and we'll downsample)."); + opts->Register("max-feature-vectors", &max_feature_vectors, + "Memory optimization. If larger than 0, periodically remove feature " + "vectors so that only this number of the latest feature vectors is " + "retained."); opts->Register("allow-upsample", &allow_upsample, "If true, allow the input waveform to have a lower frequency than " "the specified --sample-frequency (and we'll upsample)."); diff --git a/src/feat/online-feature-test.cc b/src/feat/online-feature-test.cc index e3a1d5f99f3..7ba6c7c32be 100644 --- a/src/feat/online-feature-test.cc +++ b/src/feat/online-feature-test.cc @@ -375,6 +375,45 @@ void TestOnlineAppendFeature() { } } +void TestRecyclingVector() { + RecyclingVector full_vec; + RecyclingVector shrinking_vec(10); + for (int i = 0; i != 100; ++i) { + Vector data(1); + data.Set(i); + full_vec.PushBack(new Vector(data)); + shrinking_vec.PushBack(new Vector(data)); + } + KALDI_ASSERT(full_vec.Size() == 100); + KALDI_ASSERT(shrinking_vec.Size() == 100); + + // full_vec should contain everything + for (int i = 0; i != 100; ++i) { + Vector *data = full_vec.At(i); + KALDI_ASSERT(data != nullptr); + KALDI_ASSERT((*data)(0) == static_cast(i)); + } + + // shrinking_vec may throw an exception for the first 90 elements + int caught_exceptions = 0; + for (int i = 0; i != 90; ++i) { + try { + shrinking_vec.At(i); + } catch (const std::runtime_error &) { + ++caught_exceptions; + } + } + // it may actually store a bit more elements for performance efficiency considerations + KALDI_ASSERT(caught_exceptions >= 80); + + // shrinking_vec should contain the last 10 elements + for (int i = 90; i != 100; ++i) { + Vector *data = shrinking_vec.At(i); + KALDI_ASSERT(data != nullptr); + KALDI_ASSERT((*data)(0) == static_cast(i)); + } +} + } // end namespace kaldi int main() { @@ -387,6 +426,7 @@ int main() { TestOnlinePlp(); TestOnlineTransform(); TestOnlineAppendFeature(); + TestRecyclingVector(); } std::cout << "Test OK.\n"; } diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc index 88d21473b9f..813e7b16f0c 100644 --- a/src/feat/online-feature.cc +++ b/src/feat/online-feature.cc @@ -24,18 +24,54 @@ namespace kaldi { +RecyclingVector::RecyclingVector(int items_to_hold) : + items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold), + first_available_index_(0) { +} + +RecyclingVector::~RecyclingVector() { + for (auto *item : items_) { + delete item; + } +} + +Vector *RecyclingVector::At(int index) const { + if (index < first_available_index_) { + KALDI_ERR << "Attempted to retrieve feature vector that was " + "already removed by the RecyclingVector (index = " << index << "; " + << "first_available_index = " << first_available_index_ << "; " + << "size = " << Size() << ")"; + } + // 'at' does size checking. + return items_.at(index - first_available_index_); +} + +void RecyclingVector::PushBack(Vector *item) { + if (items_.size() == items_to_hold_) { + delete items_.front(); + items_.pop_front(); + ++first_available_index_; + } + items_.push_back(item); +} + +int RecyclingVector::Size() const { + return first_available_index_ + items_.size(); +} + + template void OnlineGenericBaseFeature::GetFrame(int32 frame, VectorBase *feat) { - // 'at' does size checking. - feat->CopyFromVec(*(features_.at(frame))); + feat->CopyFromVec(*(features_.At(frame))); }; template OnlineGenericBaseFeature::OnlineGenericBaseFeature( const typename C::Options &opts): computer_(opts), window_function_(computer_.GetFrameOptions()), - input_finished_(false), waveform_offset_(0) { } + input_finished_(false), waveform_offset_(0), + features_(opts.frame_opts.max_feature_vectors) { } template void OnlineGenericBaseFeature::AcceptWaveform(BaseFloat sampling_rate, @@ -63,11 +99,10 @@ template void OnlineGenericBaseFeature::ComputeFeatures() { const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions(); int64 num_samples_total = waveform_offset_ + waveform_remainder_.Dim(); - int32 num_frames_old = features_.size(), + int32 num_frames_old = features_.Size(), num_frames_new = NumFrames(num_samples_total, frame_opts, input_finished_); KALDI_ASSERT(num_frames_new >= num_frames_old); - features_.resize(num_frames_new, NULL); Vector window; bool need_raw_log_energy = computer_.NeedRawLogEnergy(); @@ -81,7 +116,7 @@ void OnlineGenericBaseFeature::ComputeFeatures() { // note: this online feature-extraction code does not support VTLN. BaseFloat vtln_warp = 1.0; computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature); - features_[frame] = this_feature; + features_.PushBack(this_feature); } // OK, we will now discard any portion of the signal that will not be // necessary to compute frames in the future. diff --git a/src/feat/online-feature.h b/src/feat/online-feature.h index d41bb6747c7..d47a6b13e9b 100644 --- a/src/feat/online-feature.h +++ b/src/feat/online-feature.h @@ -41,6 +41,36 @@ namespace kaldi { /// @{ +/// This class serves as a storage for feature vectors with an option to limit +/// the memory usage by removing old elements. The deleted frames indices are +/// "remembered" so that regardless of the MAX_ITEMS setting, the user always +/// provides the indices as if no deletion was being performed. +/// This is useful when processing very long recordings which would otherwise +/// cause the memory to eventually blow up when the features are not being removed. +class RecyclingVector { +public: + /// By default it does not remove any elements. + RecyclingVector(int items_to_hold = -1); + + /// The ownership is being retained by this collection - do not delete the item. + Vector *At(int index) const; + + /// The ownership of the item is passed to this collection - do not delete the item. + void PushBack(Vector *item); + + /// This method returns the size as if no "recycling" had happened, + /// i.e. equivalent to the number of times the PushBack method has been called. + int Size() const; + + ~RecyclingVector(); + +private: + std::deque*> items_; + int items_to_hold_; + int first_available_index_; +}; + + /// This is a templated class for online feature extraction; /// it's templated on a class like MfccComputer or PlpComputer /// that does the basic feature extraction. @@ -61,7 +91,7 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { return computer_.GetFrameOptions().frame_shift_ms / 1000.0f; } - virtual int32 NumFramesReady() const { return features_.size(); } + virtual int32 NumFramesReady() const { return features_.Size(); } virtual void GetFrame(int32 frame, VectorBase *feat); @@ -88,10 +118,6 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { ComputeFeatures(); } - ~OnlineGenericBaseFeature() { - DeletePointers(&features_); - } - private: // This function computes any additional feature frames that it is possible to // compute from 'waveform_remainder_', which at this point may contain more @@ -107,7 +133,7 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { // features_ is the Mfcc or Plp or Fbank features that we have already computed. - std::vector*> features_; + RecyclingVector features_; // True if the user has called "InputFinished()" bool input_finished_; From b801b988e8dc000db0a8870cd41a22dc8d21b253 Mon Sep 17 00:00:00 2001 From: Tomoki Hayashi Date: Fri, 8 Mar 2019 03:10:43 +0900 Subject: [PATCH 059/235] [build] fixed a bug in linux_configure_redhat_fat when use_cuda=no (#3075) --- src/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configure b/src/configure index c156f253376..b21cc48f7ee 100755 --- a/src/configure +++ b/src/configure @@ -612,8 +612,8 @@ function linux_configure_redhat_fat { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - echo "Successfully configured for red hat [dynamic libraries, fat] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda + echo "Successfully configured for red hat [dynamic libraries, fat] with ATLASLIBS =$ATLASLIBS" } function linux_configure_atlas_static { From ce97c472ecf7fa0e048c04aef5f09368b1c94332 Mon Sep 17 00:00:00 2001 From: Hossein Hadian Date: Fri, 8 Mar 2019 03:18:18 +0330 Subject: [PATCH 060/235] [scripts] Add missing '. ./path.sh' to get_utt2num_frames.sh (#3076) --- egs/wsj/s5/utils/data/get_utt2num_frames.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/data/get_utt2num_frames.sh b/egs/wsj/s5/utils/data/get_utt2num_frames.sh index a6d4f0ecb10..d8b006a5fc0 100755 --- a/egs/wsj/s5/utils/data/get_utt2num_frames.sh +++ b/egs/wsj/s5/utils/data/get_utt2num_frames.sh @@ -10,13 +10,14 @@ frame_shift=0.01 frame_overlap=0.015 . utils/parse_options.sh +. ./path.sh if [ $# -ne 1 ]; then echo "This script writes a file utt2num_frames with the " echo "number of frames in each utterance as measured based on the " echo "duration of the utterances (in utt2dur) and the specified " echo "frame_shift and frame_overlap." - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi From 4d61452acd155160cafaca462d50e32987b14bba Mon Sep 17 00:00:00 2001 From: Hossein Hadian Date: Fri, 8 Mar 2019 03:19:50 +0330 Subject: [PATCH 061/235] [src,scripts,egs] Add count-based biphone tree tying for flat-start chain training (#3007) --- .../chain/e2e/run_tdnnf_flatstart_char.sh | 226 +---------------- .../e2e/tuning/run_tdnnf_flatstart_char1a.sh | 225 +++++++++++++++++ .../e2e/tuning/run_tdnnf_flatstart_char1b.sh | 227 ++++++++++++++++++ .../nnet3/chain/e2e/compute_biphone_stats.py | 72 ++++++ .../s5/steps/nnet3/chain/e2e/prepare_e2e.sh | 30 ++- src/gmmbin/gmm-init-biphone.cc | 167 ++++++++++--- 6 files changed, 687 insertions(+), 260 deletions(-) mode change 100755 => 120000 egs/wsj/s5/local/chain/e2e/run_tdnnf_flatstart_char.sh create mode 100755 egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh create mode 100755 egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh create mode 100755 egs/wsj/s5/steps/nnet3/chain/e2e/compute_biphone_stats.py diff --git a/egs/wsj/s5/local/chain/e2e/run_tdnnf_flatstart_char.sh b/egs/wsj/s5/local/chain/e2e/run_tdnnf_flatstart_char.sh deleted file mode 100755 index 4ab0cf58d53..00000000000 --- a/egs/wsj/s5/local/chain/e2e/run_tdnnf_flatstart_char.sh +++ /dev/null @@ -1,225 +0,0 @@ -#!/bin/bash -# Copyright 2017 Hossein Hadian - -# This script performs chain training in a flat-start manner -# and without building or using any context-dependency tree. -# It does not use ivecors or other forms of speaker adaptation -# It is called from run_e2e_char.sh - -# Note: this script is configured as grapheme-based, if you want -# to run it in phoneme mode, you'll need to change _char -# to _nosp everywhere. - -# This is the same as run_tdnn_lstm_flatstart.sh except it uses -# TDNN-F (and CMVN is disabled). - - -# local/chain/compare_wer.sh exp/chain/e2e_tdnn_lstm_bichar_1a exp/chain/e2e_tdnnf_bichar1a -# System e2e_tdnn_lstm_bichar_1a e2e_tdnnf_bichar1a -# WER dev93 (tgpr) 9.42 8.89 -# WER dev93 (tg) 8.85 8.20 -# WER dev93 (big-dict,tgpr) 7.70 6.96 -# WER dev93 (big-dict,fg) 6.79 6.01 -# WER eval92 (tgpr) 6.42 6.08 -# WER eval92 (tg) 6.11 5.79 -# WER eval92 (big-dict,tgpr) 4.50 4.39 -# WER eval92 (big-dict,fg) 4.09 3.88 -# Final train prob -0.0610 -0.0598 -# Final valid prob -0.0836 -0.0854 -# Final train prob (xent) -# Final valid prob (xent) -# Num-params 9219188 7421044 - -# steps/info/chain_dir_info.pl exp/chain/e2e_tdnnf_bichar1a -# exp/chain/e2e_tdnnf_bichar1a: num-iters=180 nj=2..8 num-params=7.4M dim=40->3444 combine=-0.064->-0.064 (over 3) logprob:train/valid[119,179,final]=(-0.093,-0.060,-0.060/-0.107,-0.086,-0.085) - - -set -e - -# configs for 'chain' -stage=0 -train_stage=-10 -get_egs_stage=-10 -affix=1a - -# training options -dropout_schedule='0,0@0.20,0.5@0.50,0' -num_epochs=10 -num_jobs_initial=2 -num_jobs_final=8 -minibatch_size=150=128,64/300=64,32/600=32,16/1200=8 -common_egs_dir= -l2_regularize=0.00005 -frames_per_iter=3000000 -cmvn_opts="--norm-means=false --norm-vars=false" -train_set=train_si284_spe2e_hires -test_sets="test_dev93 test_eval92" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 1 ]; then - echo "$0: Estimating a phone language model for the denominator graph..." - mkdir -p $treedir/log - $train_cmd $treedir/log/make_phone_lm.log \ - cat data/$train_set/text \| \ - steps/nnet3/chain/e2e/text_to_phones.py --between-silprob 0.1 \ - data/lang_char \| \ - utils/sym2int.pl -f 2- data/lang_char/phones.txt \| \ - chain-est-phone-lm --num-extra-lm-states=2000 \ - ark:- $treedir/phone_lm.fst - steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \ - --type biphone \ - --shared-phones true \ - data/$train_set $lang $treedir -fi - -if [ $stage -le 2 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') - tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" - tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" - linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" - prefinal_opts="l2-regularize=0.01" - output_opts="l2-regularize=0.005" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - - input dim=40 name=input - - relu-batchnorm-dropout-layer name=tdnn1 input=Append(-1,0,1) $tdnn_opts dim=1024 - tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 - tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 - tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 - tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 - tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - linear-component name=prefinal-l dim=192 $linear_opts - - - prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 - output-layer name=output include-log-softmax=false dim=$num_targets $output_opts - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs -fi - -if [ $stage -le 3 ]; then - # no need to store the egs in a shared storage because we always - # remove them. Anyway, it takes only 5 minutes to generate them. - - steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.cmvn-opts "$cmvn_opts" \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize $l2_regularize \ - --chain.apply-deriv-weights false \ - --egs.dir "$common_egs_dir" \ - --egs.stage $get_egs_stage \ - --egs.opts "" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter $frames_per_iter \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial $num_jobs_initial \ - --trainer.optimization.num-jobs-final $num_jobs_final \ - --trainer.optimization.initial-effective-lrate 0.0005 \ - --trainer.optimization.final-effective-lrate 0.00005 \ - --trainer.optimization.shrink-value 1.0 \ - --trainer.max-param-change 2.0 \ - --cleanup.remove-egs true \ - --feat-dir data/${train_set} \ - --tree-dir $treedir \ - --dir $dir || exit 1; -fi - -if [ $stage -le 4 ]; then - # The reason we are using data/lang here, instead of $lang, is just to - # emphasize that it's not actually important to give mkgraph.sh the - # lang directory with the matched topology (since it gets the - # topology file from the model). So you could give it a different - # lang directory, one that contained a wordlist and LM of your choice, - # as long as phones.txt was compatible. - - utils/lang/check_phones_compatible.sh \ - data/lang_char_test_tgpr/phones.txt $lang/phones.txt - utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_tgpr \ - $dir $treedir/graph_tgpr || exit 1; - - utils/lang/check_phones_compatible.sh \ - data/lang_char_test_bd_tgpr/phones.txt $lang/phones.txt - utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_bd_tgpr \ - $dir $treedir/graph_bd_tgpr || exit 1; -fi - -if [ $stage -le 5 ]; then - frames_per_chunk=150 - rm $dir/.error 2>/dev/null || true - - for data in $test_sets; do - ( - data_affix=$(echo $data | sed s/test_//) - nspk=$(wc -l 3444 combine=-0.064->-0.064 (over 3) logprob:train/valid[119,179,final]=(-0.093,-0.060,-0.060/-0.107,-0.086,-0.085) + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +affix=1a + +# training options +dropout_schedule='0,0@0.20,0.5@0.50,0' +num_epochs=10 +num_jobs_initial=2 +num_jobs_final=8 +minibatch_size=150=128,64/300=64,32/600=32,16/1200=8 +common_egs_dir= +l2_regularize=0.00005 +frames_per_iter=3000000 +cmvn_opts="--norm-means=false --norm-vars=false" +train_set=train_si284_spe2e_hires +test_sets="test_dev93 test_eval92" + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 1 ]; then + echo "$0: Estimating a phone language model for the denominator graph..." + mkdir -p $treedir/log + $train_cmd $treedir/log/make_phone_lm.log \ + cat data/$train_set/text \| \ + steps/nnet3/chain/e2e/text_to_phones.py --between-silprob 0.1 \ + data/lang_char \| \ + utils/sym2int.pl -f 2- data/lang_char/phones.txt \| \ + chain-est-phone-lm --num-extra-lm-states=2000 \ + ark:- $treedir/phone_lm.fst + steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \ + --type biphone \ + --shared-phones true \ + data/$train_set $lang $treedir +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.005" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + + input dim=40 name=input + + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-1,0,1) $tdnn_opts dim=1024 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + linear-component name=prefinal-l dim=192 $linear_opts + + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi + +if [ $stage -le 3 ]; then + # no need to store the egs in a shared storage because we always + # remove them. Anyway, it takes only 5 minutes to generate them. + + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.cmvn-opts "$cmvn_opts" \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize $l2_regularize \ + --chain.apply-deriv-weights false \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter $frames_per_iter \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate 0.0005 \ + --trainer.optimization.final-effective-lrate 0.00005 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir data/${train_set} \ + --tree-dir $treedir \ + --dir $dir || exit 1; +fi + +if [ $stage -le 4 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_char_test_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_char_test_tgpr \ + $dir $treedir/graph_tgpr || exit 1; + + utils/lang/check_phones_compatible.sh \ + data/lang_char_test_bd_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_char_test_bd_tgpr \ + $dir $treedir/graph_bd_tgpr || exit 1; +fi + +if [ $stage -le 5 ]; then + frames_per_chunk=150 + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l 1397 combine=-0.064->-0.064 (over 2) logprob:train/valid[119,179,final]=(-0.086,-0.060,-0.060/-0.099,-0.087,-0.087) + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +affix=1b + +# training options +dropout_schedule='0,0@0.20,0.5@0.50,0' +num_epochs=10 +num_jobs_initial=2 +num_jobs_final=8 +minibatch_size=150=128,64/300=64,32/600=32,16/1200=8 +common_egs_dir= +l2_regularize=0.00005 +frames_per_iter=3000000 +cmvn_opts="--norm-means=false --norm-vars=false" +train_set=train_si284_spe2e_hires +test_sets="test_dev93 test_eval92" + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 1 ]; then + echo "$0: Estimating a phone language model for the denominator graph..." + mkdir -p $treedir/log + $train_cmd $treedir/log/make_phone_lm.log \ + cat data/$train_set/text \| \ + steps/nnet3/chain/e2e/text_to_phones.py --between-silprob 0.1 \ + data/lang_char \| \ + utils/sym2int.pl -f 2- data/lang_char/phones.txt \| \ + chain-est-phone-lm --num-extra-lm-states=2000 \ + ark:- $treedir/phone_lm.fst + steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \ + --type biphone \ + --shared-phones true \ + --tie true \ + --min-biphone-count 100 \ + --min-monophone-count 20 \ + data/$train_set $lang $treedir +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.005" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + + input dim=40 name=input + + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-1,0,1) $tdnn_opts dim=1024 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + linear-component name=prefinal-l dim=192 $linear_opts + + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi + +if [ $stage -le 3 ]; then + # no need to store the egs in a shared storage because we always + # remove them. Anyway, it takes only 5 minutes to generate them. + + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.cmvn-opts "$cmvn_opts" \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize $l2_regularize \ + --chain.apply-deriv-weights false \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter $frames_per_iter \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate 0.0005 \ + --trainer.optimization.final-effective-lrate 0.00005 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir data/${train_set} \ + --tree-dir $treedir \ + --dir $dir || exit 1; +fi + +if [ $stage -le 4 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_char_test_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_char_test_tgpr \ + $dir $treedir/graph_tgpr || exit 1; + + utils/lang/check_phones_compatible.sh \ + data/lang_char_test_bd_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_char_test_bd_tgpr \ + $dir $treedir/graph_bd_tgpr || exit 1; +fi + +if [ $stage -le 5 ]; then + frames_per_chunk=150 + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l + and the second part of the output is monophone counts with the + following format: + """) +parser.add_argument('langdir', type=str) +parser.add_argument('--shared-phones', type=str, choices=['true','false'], + default='true', + help="If true, stats will be collected for shared phones.") + +args = parser.parse_args() +args.shared_phones = True if args.shared_phones == 'true' else False + +# Read phone sets +phone_sets = [] +phones = [] +phone_to_shard_phone = {} +phone_to_shard_phone[0] = 0 # The no-left-context case +with open(join(args.langdir, 'phones/sets.int'), 'r', encoding='latin-1') as f: + for line in f: + phone_set = line.strip().split() + phone_sets.append(phone_set) + for phone in phone_set: + phones.append(phone) + phone_to_shard_phone[phone] = phone_set[0] + +print('Loaded {} phone-sets containing {} phones.'.format(len(phone_sets), + len(phones)), + file=sys.stderr) + +biphone_counts = {} +mono_counts = {} +for line in sys.stdin: + line = line.strip().split() + key = line[0] + line_phones = line[1:] + for pair in zip([0] + line_phones, line_phones): # 0 is for the no left-context case + if args.shared_phones: + pair = (phone_to_shard_phone[pair[0]], phone_to_shard_phone[pair[1]]) + if pair not in biphone_counts: + biphone_counts[pair] = 0 + biphone_counts[pair] += 1 + mono_counts[pair[1]] = 1 if pair[1] not in mono_counts else mono_counts[pair[1]] + 1 + +for phone1 in [0] + phones: + for phone2 in phones: + pair = (phone1, phone2) + shared_pair = ((phone_to_shard_phone[pair[0]], phone_to_shard_phone[pair[1]]) + if args.shared_phones else pair) + count = biphone_counts[shared_pair] if shared_pair in biphone_counts else 0 + if count != 0: + print('{} {} {}'.format(pair[0], pair[1], count)) +for phone in phones: + shared = phone_to_shard_phone[phone] if args.shared_phones else phone + count = mono_counts[shared] if shared in mono_counts else 0 + if count != 0: + print('{} {}'.format(phone, count)) diff --git a/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh b/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh index c211381bf8b..07d5ee8cfb8 100755 --- a/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh +++ b/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh @@ -14,13 +14,23 @@ cmd=run.pl nj=4 stage=0 shared_phones=true -treedir= # if specified, the tree and model will be copied from there +treedir= # If specified, the tree and model will be copied from there # note that it may not be flat start anymore. -type=mono # can be either mono or biphone -- either way +type=mono # Can be either mono or biphone -- either way # the resulting tree is full (i.e. it doesn't do any tying) -ci_silence=false # if true, silence phones will be treated as context independent +ci_silence=false # If true, silence phones will be treated as context independent scale_opts="--transition-scale=0.0 --self-loop-scale=0.0" +tie=false # If true, gmm-init-biphone will do some tying when + # creating the full biphone tree (it won't be full anymore). + # Specifically, it will revert to monophone if the data + # counts for a biphone are smaller than min_biphone_count. + # If the monophone count is also smaller than min_monophone_count, + # it will revert to a shared global phone. Note that this + # only affects biphone models (i.e., type=biphone) which + # use the special chain topology. +min_biphone_count=100 +min_monophone_count=20 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -35,6 +45,7 @@ if [ $# != 3 ]; then echo " --config # config containing options" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --type # context dependency type" + echo " --tie # enable/disable count-based tying" exit 1; fi @@ -69,12 +80,23 @@ if $ci_silence; then ci_opt="--ci-phones=$ciphonelist" fi +tie_opts= +if $tie && [[ "$type" = "biphone" ]]; then + cat $data/text | steps/chain/e2e/text_to_phones.py --edge-silprob 0 \ + --between-silprob 0 \ + $lang | \ + cut -d' ' -f 2- | utils/sym2int.pl $lang/phones.txt | \ + steps/chain/e2e/compute_biphone_stats.py $lang >$dir/phone-stats.txt + tie_opts="--min-biphone-count=$min_biphone_count \ +--min-monophone-count=$min_monophone_count --phone-counts=$dir/phone-stats.txt" +fi + if [ $stage -le 0 ]; then if [ -z $treedir ]; then echo "$0: Initializing $type system." # feat dim does not matter here. Just set it to 10 $cmd $dir/log/init_${type}_mdl_tree.log \ - gmm-init-$type $ci_opt $shared_phones_opt $lang/topo 10 \ + gmm-init-$type $tie_opts $ci_opt $shared_phones_opt $lang/topo 10 \ $dir/0.mdl $dir/tree || exit 1; else echo "$0: Copied tree/mdl from $treedir." >$dir/log/init_mdl_tree.log diff --git a/src/gmmbin/gmm-init-biphone.cc b/src/gmmbin/gmm-init-biphone.cc index 42a9d1a91a0..0775a5c7b23 100644 --- a/src/gmmbin/gmm-init-biphone.cc +++ b/src/gmmbin/gmm-init-biphone.cc @@ -52,12 +52,14 @@ void ReadSharedPhonesList(std::string rxfilename, std::vector EventMap *GetFullBiphoneStubMap(const std::vector > &phone_sets, const std::vector &phone2num_pdf_classes, - const std::vector &share_roots, - const std::vector &ci_phones_list) { + const std::vector &ci_phones_list, + const std::vector > &bi_counts, + int32 biphone_min_count, + const std::vector &mono_counts, + int32 mono_min_count) { { // Check the inputs - KALDI_ASSERT(!phone_sets.empty() && - share_roots.size() == phone_sets.size()); + KALDI_ASSERT(!phone_sets.empty()); std::set all_phones; for (size_t i = 0; i < phone_sets.size(); i++) { KALDI_ASSERT(IsSortedAndUniq(phone_sets[i])); @@ -81,6 +83,14 @@ EventMap level1_map[ci_phones_list[i]] = new TableEventMap(kPdfClass, level2_map); } + // If there is not enough data for a biphone, we will revert to monophone + // and if there is not enough data for the monophone either, we will revert + // to zerophone (which is like a global garbage pdf) after initializing it. + int32 zerophone_pdf = -1; + // If a monophone state is created for a phone-set, the corresponding pdf will + // be stored in this vector. + std::vector monophone_pdf(phone_sets.size(), -1); + for (size_t i = 0; i < phone_sets.size(); i++) { if (numpdfs_per_phone == 1) { @@ -100,38 +110,68 @@ EventMap level1_map[pset[k]] = new TableEventMap(0, level2_map); } else { KALDI_ASSERT(numpdfs_per_phone == 2); - int32 base_pdfid = current_pdfid; - std::vector pset = phone_sets[i]; // All these will have a shared + std::vector right_phoneset = phone_sets[i]; // All these will have a shared // event-map child - for (size_t k = 0; k < pset.size(); k++) { - // Create an event map for level2: - std::map level2_map; // key is 0 - { - std::map level3_map; // key is kPdfClass + // Create an event map for level2: + std::map level2_map; // key is 0 + { // Handle CI phones + std::map level3_map; // key is kPdfClass + level3_map[0] = current_pdfid++; + level3_map[1] = current_pdfid++; + level2_map[0] = new TableEventMap(kPdfClass, level3_map); // no-left-context case + for (size_t i = 0; i < ci_phones_list.size(); i++) // ci-phone left-context cases + level2_map[ci_phones_list[i]] = new TableEventMap(kPdfClass, level3_map); + } + for (size_t j = 0; j < phone_sets.size(); j++) { + std::vector left_phoneset = phone_sets[j]; // All these will have a + // shared subtree with 2 pdfids + std::map level3_map; // key is kPdfClass + if (bi_counts.empty() || + bi_counts[left_phoneset[0]][right_phoneset[0]] >= biphone_min_count) { level3_map[0] = current_pdfid++; level3_map[1] = current_pdfid++; - level2_map[0] = new TableEventMap(kPdfClass, level3_map); // no-left-context case - for (size_t i = 0; i < ci_phones_list.size(); i++) // ci-phone left-context cases - level2_map[ci_phones_list[i]] = new TableEventMap(kPdfClass, level3_map); + } else if (mono_counts.empty() || + mono_counts[right_phoneset[0]] > mono_min_count) { + // Revert to mono. + KALDI_VLOG(2) << "Reverting to mono for biphone (" << left_phoneset[0] + << "," << right_phoneset[0] << ")"; + if (monophone_pdf[i] == -1) { + KALDI_VLOG(1) << "Reserving mono PDFs for phone-set " << i; + monophone_pdf[i] = current_pdfid++; + current_pdfid++; // num-pdfs-per-phone is 2 + } + level3_map[0] = monophone_pdf[i]; + level3_map[1] = monophone_pdf[i] + 1; + } else { + KALDI_VLOG(2) << "Reverting to zerophone for biphone (" + << left_phoneset[0] + << "," << right_phoneset[0] << ")"; + // Revert to zerophone + if (zerophone_pdf == -1) { + KALDI_VLOG(1) << "Reserving zero PDFs."; + zerophone_pdf = current_pdfid++; + current_pdfid++; // num-pdfs-per-phone is 2 + } + level3_map[0] = zerophone_pdf; + level3_map[1] = zerophone_pdf + 1; } - for (size_t j = 0; j < phone_sets.size(); j++) { - std::map level3_map; // key is kPdfClass - level3_map[0] = current_pdfid++; - level3_map[1] = current_pdfid++; - std::vector ipset = phone_sets[j]; // All these will have a - // shared subtree with 2 pdfids - for (size_t ik = 0; ik < ipset.size(); ik++) { - level2_map[ipset[ik]] = new TableEventMap(kPdfClass, level3_map); - } + for (size_t k = 0; k < left_phoneset.size(); k++) { + int32 left_phone = left_phoneset[k]; + level2_map[left_phone] = new TableEventMap(kPdfClass, level3_map); } - level1_map[pset[k]] = new TableEventMap(0, level2_map); - if (k != pset.size() - 1) - current_pdfid = base_pdfid; + } + for (size_t k = 0; k < right_phoneset.size(); k++) { + std::map level2_copy; + for (auto const& kv: level2_map) + level2_copy[kv.first] = kv.second->Copy(std::vector()); + int32 right_phone = right_phoneset[k]; + level1_map[right_phone] = new TableEventMap(0, level2_copy); } } } + KALDI_LOG << "Num PDFs: " << current_pdfid; return new TableEventMap(1, level1_map); } @@ -139,7 +179,11 @@ EventMap ContextDependency* BiphoneContextDependencyFull(std::vector > phone_sets, const std::vector phone2num_pdf_classes, - const std::vector &ci_phones_list) { + const std::vector &ci_phones_list, + const std::vector > &bi_counts, + int32 biphone_min_count, + const std::vector &mono_counts, + int32 mono_min_count) { // Remove all the CI phones from the phone sets std::set ci_phones; for (size_t i = 0; i < ci_phones_list.size(); i++) @@ -159,13 +203,54 @@ BiphoneContextDependencyFull(std::vector > phone_sets, int32 P = 1, N = 2; EventMap *pdf_map = GetFullBiphoneStubMap(phone_sets, phone2num_pdf_classes, - share_roots, ci_phones_list); + ci_phones_list, bi_counts, + biphone_min_count, mono_counts, + mono_min_count); return new ContextDependency(N, P, pdf_map); } } // end namespace kaldi +/* This function reads the counts of biphones and monophones from a text file + generated for chain flat-start training. On each line there is either a + biphone count or a monophone count: + + + The phone-id's are according to phones.txt. + + It's more efficient to load the biphone counts into a map because + most entries are zero, but since there are not many biphones, a 2-dim vector + is OK. */ +static void ReadPhoneCounts(std::string &filename, int32 num_phones, + std::vector *mono_counts, + std::vector > *bi_counts) { + // The actual phones start from id = 1 (so the last phone has id = num_phones). + mono_counts->resize(num_phones + 1, 0); + bi_counts->resize(num_phones + 1, std::vector(num_phones + 1, 0)); + std::ifstream infile(filename); + std::string line; + while (std::getline(infile, line)) { + std::istringstream iss(line); + int a, b; + long c; + if ((std::istringstream(line) >> a >> b >> c)) { + // It's a biphone count. + KALDI_ASSERT(a >= 0 && a <= num_phones); // 0 means no-left-context + KALDI_ASSERT(b > 0 && b <= num_phones); + KALDI_ASSERT(c >= 0); + (*bi_counts)[a][b] = c; + } else if ((std::istringstream(line) >> b >> c)) { + // It's a monophone count. + KALDI_ASSERT(b > 0 && b <= num_phones); + KALDI_ASSERT(c >= 0); + (*mono_counts)[b] = c; + } else { + KALDI_ERR << "Bad line in phone stats file: " << line; + } + } +} + int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -179,7 +264,8 @@ int main(int argc, char *argv[]) { " gmm-init-biphone topo 39 bi.mdl bi.tree\n"; bool binary = true; - std::string shared_phones_rxfilename; + std::string shared_phones_rxfilename, phone_counts_rxfilename; + int32 min_biphone_count = 100, min_mono_count = 20; std::string ci_phones_str; std::vector ci_phones; // Sorted, uniqe vector of // context-independent phones. @@ -191,6 +277,15 @@ int main(int argc, char *argv[]) { "whose pdfs should be shared."); po.Register("ci-phones", &ci_phones_str, "Colon-separated list of " "integer indices of context-independent phones."); + po.Register("phone-counts", &phone_counts_rxfilename, + "rxfilename containing, on each line, a biphone/phone and " + "its count in the training data."); + po.Register("min-biphone-count", &min_biphone_count, "Minimum number of " + "occurences of a biphone in training data to reserve pdfs " + "for it."); + po.Register("min-monophone-count", &min_mono_count, "Minimum number of " + "occurences of a monophone in training data to reserve pdfs " + "for it."); po.Read(argc, argv); if (po.NumArgs() != 4) { @@ -214,7 +309,6 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Invalid --ci-phones option: " << ci_phones_str; } - Vector glob_inv_var(dim); glob_inv_var.Set(1.0); Vector glob_mean(dim); @@ -235,6 +329,15 @@ int main(int argc, char *argv[]) { phone2num_pdf_classes[phones[i]] == 2); } + std::vector mono_counts; + std::vector > bi_counts; + if (!phone_counts_rxfilename.empty()) { + ReadPhoneCounts(phone_counts_rxfilename, phones.size(), + &mono_counts, &bi_counts); + KALDI_LOG << "Loaded mono/bi phone counts."; + } + + // Now the tree: ContextDependency *ctx_dep = NULL; std::vector > shared_phones; @@ -247,7 +350,9 @@ int main(int argc, char *argv[]) { // ReadSharedPhonesList crashes on error. } ctx_dep = BiphoneContextDependencyFull(shared_phones, phone2num_pdf_classes, - ci_phones); + ci_phones, bi_counts, + min_biphone_count, + mono_counts, min_mono_count); int32 num_pdfs = ctx_dep->NumPdfs(); From 4767c7ce0aef8db9d2e4bdd708773fc84ef1cf0b Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Fri, 8 Mar 2019 22:03:50 +0530 Subject: [PATCH 062/235] Update pocolm_cust.sh --- egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh index 422db15937a..0e71be29119 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh @@ -13,7 +13,7 @@ export PATH=$PATH:$POCOLM_ROOT/scripts wordlist=None num_word=100000 -pocolm_stage=2 +pocolm_stage=1 ngram_order=3 lm_dir= arpa_dir= From 2cd5948302c2f4c787a28d7fc96b700af8f525c3 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Fri, 8 Mar 2019 22:04:58 +0530 Subject: [PATCH 063/235] Update run.sh --- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 7e488cdc5fa..b63b5208138 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -6,6 +6,7 @@ stage=-1 lmstage=-2 +num_words_pocolm=110000 train_sgmm2=false # call the next line with the directory where the Spanish Fisher data is @@ -96,7 +97,6 @@ if [ $stage -le 0 ]; then cp "$rnnlm_workdir"/normalised_gigaword_corpus/text_normalized "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt fi -num_words_pocolm=110000 if [ $stage -le 1 ]; then local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm local/get_rnnlm_wordlist.py data/lang/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ From 01cef690034778e7ec113e8f3941ee0e9ac0d4bf Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Fri, 8 Mar 2019 13:52:29 -0500 Subject: [PATCH 064/235] [scripts,egs] Remove sed from various scripts (avoid compatibility problems) (#2981) --- egs/ami/s5/local/ami_ihm_scoring_data_prep.sh | 13 +++++-------- egs/ami/s5/local/ami_mdm_scoring_data_prep.sh | 10 +++------- egs/ami/s5/local/ami_sdm_scoring_data_prep.sh | 10 +++------- egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh | 11 ++++------- egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh | 10 +++------- egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh | 12 ++++-------- .../multi_condition/prepare_impulses_noises.sh | 2 +- .../s5c/local/syllab/generate_syllable_lang.sh | 3 +-- egs/babel/s5d/local/syllab/generate_phone_lang.sh | 3 +-- .../s5d/local/syllab/generate_syllable_lang.sh | 3 +-- egs/bentham/v1/local/create_splits.sh | 6 ++---- .../s5/local/callhome_prepare_dict.sh | 5 ++--- egs/callhome_egyptian/s5/local/ctm.sh | 6 +++--- egs/fisher_callhome_spanish/s5/local/ctm.sh | 6 +++--- .../s5/local/fsp_prepare_dict.sh | 5 ++--- egs/gale_arabic/s5/local/gale_prep_dict.sh | 3 +-- egs/iam/v1/local/prepare_dict.sh | 2 +- egs/iam/v2/local/prepare_dict.sh | 2 +- egs/reverb/s5/local/download_se_eval_tool.sh | 14 +++++++------- egs/yomdle_fa/v1/local/prepare_dict.sh | 2 +- egs/yomdle_zh/v1/local/create_download.sh | 2 +- tools/extras/install_portaudio.sh | 2 +- 22 files changed, 51 insertions(+), 81 deletions(-) diff --git a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh index 3157d7ffec7..7112e0259a0 100755 --- a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh @@ -87,18 +87,15 @@ sort -k 2 $dir/utt2spk | utils/utt2spk_to_spk2utt.pl > $dir/spk2utt || exit 1; join $dir/utt2spk $dir/segments | \ perl -ne '{BEGIN{$pu=""; $pt=0.0;} split; if ($pu eq $_[1] && $pt > $_[3]) { - print "$_[0] $_[2] $_[3] $_[4]>$_[0] $_[2] $pt $_[4]\n" + print "s/^$_[0] $_[2] $_[3] $_[4]\$/$_[0] $_[2] $pt $_[4]/;\n" } - $pu=$_[1]; $pt=$_[4]; + $pu=$_[1]; $pt=$_[4]; }' > $dir/segments_to_fix -if [ `cat $dir/segments_to_fix | wc -l` -gt 0 ]; then + +if [ -s $dir/segments_to_fix ]; then echo "$0. Applying following fixes to segments" cat $dir/segments_to_fix - while read line; do - p1=`echo $line | awk -F'>' '{print $1}'` - p2=`echo $line | awk -F'>' '{print $2}'` - sed -ir "s!$p1!$p2!" $dir/segments - done < $dir/segments_to_fix + perl -i -pf $dir/segments_to_fix $dir/segments fi # Copy stuff into its final locations diff --git a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh index 4cfa9110edf..9c4b55308f2 100755 --- a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh @@ -94,19 +94,15 @@ awk '{print $1}' $tmpdir/segments | \ join $tmpdir/utt2spk_stm $tmpdir/segments | \ awk '{ utt=$1; spk=$2; wav=$3; t_beg=$4; t_end=$5; if(spk_prev == spk && t_end_prev > t_beg) { - print utt, wav, t_beg, t_end">"utt, wav, t_end_prev, t_end; + print "s/^"utt, wav, t_beg, t_end"$/"utt, wav, t_end_prev, t_end"/;"; } spk_prev=spk; t_end_prev=t_end; }' > $tmpdir/segments_to_fix -if [ `cat $tmpdir/segments_to_fix | wc -l` -gt 0 ]; then +if [ -s $tmpdir/segments_to_fix ]; then echo "$0. Applying following fixes to segments" cat $tmpdir/segments_to_fix - while read line; do - p1=`echo $line | awk -F'>' '{print $1}'` - p2=`echo $line | awk -F'>' '{print $2}'` - sed -ir "s:$p1:$p2:" $tmpdir/segments - done < $tmpdir/segments_to_fix + perl -i -pf $tmpdir/segments_to_fix $tmpdir/segments fi # Copy stuff into its final locations [this has been moved from the format_data diff --git a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh index 91baa37d6e1..815e1b2d270 100755 --- a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh @@ -101,19 +101,15 @@ awk '{print $1}' $tmpdir/segments | \ join $tmpdir/utt2spk_stm $tmpdir/segments | \ awk '{ utt=$1; spk=$2; wav=$3; t_beg=$4; t_end=$5; if(spk_prev == spk && t_end_prev > t_beg) { - print utt, wav, t_beg, t_end">"utt, wav, t_end_prev, t_end; + print "s/^"utt, wav, t_beg, t_end"$/"utt, wav, t_end_prev, t_end"/;"; } spk_prev=spk; t_end_prev=t_end; }' > $tmpdir/segments_to_fix -if [ `cat $tmpdir/segments_to_fix | wc -l` -gt 0 ]; then +if [ -s $tmpdir/segments_to_fix ]; then echo "$0. Applying following fixes to segments" cat $tmpdir/segments_to_fix - while read line; do - p1=`echo $line | awk -F'>' '{print $1}'` - p2=`echo $line | awk -F'>' '{print $2}'` - sed -ir "s:$p1:$p2:" $tmpdir/segments - done < $tmpdir/segments_to_fix + perl -i -pf $tmpdir/segments_to_fix $tmpdir/segments fi # Copy stuff into its final locations [this has been moved from the format_data diff --git a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh index 746c42c4c1a..c54876331f1 100755 --- a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh @@ -93,18 +93,15 @@ sort -k 2 $dir/utt2spk | utils/utt2spk_to_spk2utt.pl > $dir/spk2utt || exit 1; join $dir/utt2spk $dir/segments | \ perl -ne '{BEGIN{$pu=""; $pt=0.0;} split; if ($pu eq $_[1] && $pt > $_[3]) { - print "$_[0] $_[2] $_[3] $_[4]>$_[0] $_[2] $pt $_[4]\n" + print "s/^$_[0] $_[2] $_[3] $_[4]\$/$_[0] $_[2] $pt $_[4]/;\n" } $pu=$_[1]; $pt=$_[4]; }' > $dir/segments_to_fix -if [ `cat $dir/segments_to_fix | wc -l` -gt 0 ]; then + +if [ -s $dir/segments_to_fix ]; then echo "$0. Applying following fixes to segments" cat $dir/segments_to_fix - while read line; do - p1=`echo $line | awk -F'>' '{print $1}'` - p2=`echo $line | awk -F'>' '{print $2}'` - sed -ir "s!$p1!$p2!" $dir/segments - done < $dir/segments_to_fix + perl -i -pf $dir/segments_to_fix $dir/segments fi # Copy stuff into its final locations diff --git a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh index 65f514f223c..475ef5405ba 100755 --- a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh @@ -99,19 +99,15 @@ awk '{print $1}' $tmpdir/segments | \ join $tmpdir/utt2spk_stm $tmpdir/segments | \ awk '{ utt=$1; spk=$2; wav=$3; t_beg=$4; t_end=$5; if(spk_prev == spk && t_end_prev > t_beg) { - print utt, wav, t_beg, t_end">"utt, wav, t_end_prev, t_end; + print "s/^"utt, wav, t_beg, t_end"$/"utt, wav, t_end_prev, t_end"/;"; } spk_prev=spk; t_end_prev=t_end; }' > $tmpdir/segments_to_fix -if [ `cat $tmpdir/segments_to_fix | wc -l` -gt 0 ]; then +if [ -s $tmpdir/segments_to_fix ]; then echo "$0. Applying following fixes to segments" cat $tmpdir/segments_to_fix - while read line; do - p1=`echo $line | awk -F'>' '{print $1}'` - p2=`echo $line | awk -F'>' '{print $2}'` - sed -ir "s:$p1:$p2:" $tmpdir/segments - done < $tmpdir/segments_to_fix + perl -i -pf $tmpdir/segments_to_fix $tmpdir/segments fi # Copy stuff into its final locations [this has been moved from the format_data diff --git a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh index 1378f8b8965..d7ce038c0a7 100755 --- a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh @@ -111,25 +111,21 @@ awk '{print $1}' $tmpdir/segments | \ join $tmpdir/utt2spk_stm $tmpdir/segments | \ awk '{ utt=$1; spk=$2; wav=$3; t_beg=$4; t_end=$5; if(spk_prev == spk && t_end_prev > t_beg) { - print utt, wav, t_beg, t_end">"utt, wav, t_end_prev, t_end; + print "s/^"utt, wav, t_beg, t_end"$/"utt, wav, t_end_prev, t_end"/;"; } spk_prev=spk; t_end_prev=t_end; }' > $tmpdir/segments_to_fix -if [ `cat $tmpdir/segments_to_fix | wc -l` -gt 0 ]; then +if [ -s $tmpdir/segments_to_fix ]; then echo "$0. Applying following fixes to segments" cat $tmpdir/segments_to_fix - while read line; do - p1=`echo $line | awk -F'>' '{print $1}'` - p2=`echo $line | awk -F'>' '{print $2}'` - sed -ir "s:$p1:$p2:" $tmpdir/segments - done < $tmpdir/segments_to_fix + perl -i -pf $tmpdir/segments_to_fix $tmpdir/segments fi # Copy stuff into its final locations [this has been moved from the format_data # script] mkdir -p $dir -for f in spk2utt utt2spk utt2spk_stm wav.scp text segments reco2file_and_channel; do +for f in segments_to_fix spk2utt utt2spk utt2spk_stm wav.scp text segments reco2file_and_channel; do cp $tmpdir/$f $dir/$f || exit 1; done diff --git a/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh b/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh index 804de611cae..8297cdee9ca 100755 --- a/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh +++ b/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh @@ -114,7 +114,7 @@ cp ${output_dir}_non_normalized/info/* $output_dir/info # rename file location in the noise-rir pairing files for file in `ls $output_dir/info/noise_impulse*`; do - sed -i "s/_non_normalized//g" $file + perl -i -pe "s/_non_normalized//g" $file done # generating the rir-list with probabilities alloted for each rir diff --git a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh index 2d1fcb2259e..4a0810b9415 100755 --- a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh +++ b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh @@ -118,8 +118,7 @@ ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst echo "Validating the output lang dir" utils/validate_lang.pl $out || exit 1 -sed -i'' 's/#1$//g' $lout/lexicon.txt -sed -i'' 's/#1$//g' $lout/lexiconp.txt +perl -i -pe 's/#1$//g' $lout/lexicon.txt $lout/lexiconp.txt echo "Done OK." exit 0 diff --git a/egs/babel/s5d/local/syllab/generate_phone_lang.sh b/egs/babel/s5d/local/syllab/generate_phone_lang.sh index fc21a23231b..81d8a0acdc7 100755 --- a/egs/babel/s5d/local/syllab/generate_phone_lang.sh +++ b/egs/babel/s5d/local/syllab/generate_phone_lang.sh @@ -122,8 +122,7 @@ ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst echo "Validating the output lang dir" utils/validate_lang.pl $out || exit 1 -sed -i'' 's/#1$//g' $lout/lexicon.txt -sed -i'' 's/#1$//g' $lout/lexiconp.txt +perl -i -pe 's/#1$//g' $lout/lexicon.txt $lout/lexiconp.txt echo "Done OK." exit 0 diff --git a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh index db7b0902425..a7bd667027c 100755 --- a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh +++ b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh @@ -122,8 +122,7 @@ ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst echo "Validating the output lang dir" utils/validate_lang.pl $out || exit 1 -sed -i'' 's/#1$//g' $lout/lexicon.txt -sed -i'' 's/#1$//g' $lout/lexiconp.txt +perl -i -pe 's/#1$//g' $lout/lexicon.txt $lout/lexiconp.txt echo "Done OK." exit 0 diff --git a/egs/bentham/v1/local/create_splits.sh b/egs/bentham/v1/local/create_splits.sh index 93e8bf1b12e..e8ea2279a49 100755 --- a/egs/bentham/v1/local/create_splits.sh +++ b/egs/bentham/v1/local/create_splits.sh @@ -27,10 +27,8 @@ function split { echo $name $lines_dir"/"$name".png" >> $split_dir/images.scp echo $name $spkid >> $split_dir/utt2spk done < "$line_file" - - sed -i '/^\s*$/d' $split_dir/images.scp - sed -i '/^\s*$/d' $split_dir/text - sed -i '/^\s*$/d' $split_dir/utt2spk + + perl -i -ne 'print if /\S/' $split_dir/images.scp $split_dir/text $split_dir/utt2spk utils/utt2spk_to_spk2utt.pl $split_dir/utt2spk > $split_dir/spk2utt } diff --git a/egs/callhome_egyptian/s5/local/callhome_prepare_dict.sh b/egs/callhome_egyptian/s5/local/callhome_prepare_dict.sh index 62bca974e53..d9faa97f266 100755 --- a/egs/callhome_egyptian/s5/local/callhome_prepare_dict.sh +++ b/egs/callhome_egyptian/s5/local/callhome_prepare_dict.sh @@ -54,9 +54,8 @@ cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > \ $dir/extra_questions.txt || exit 1; # Add prons for laughter, noise, oov -for w in `grep -v sil $dir/silence_phones.txt`; do -sed -i "/\[$w\]/d" $tmpdir/lexicon.3 -done +w=$(grep -v sil $dir/silence_phones.txt | tr '\n' '|') +perl -i -ne "print unless /\[(${w%?})\]/" $tmpdir/lexicon.3 for w in `grep -v sil $dir/silence_phones.txt`; do echo "[$w] $w" diff --git a/egs/callhome_egyptian/s5/local/ctm.sh b/egs/callhome_egyptian/s5/local/ctm.sh index 14056b7a44b..64a7cf0d4f6 100755 --- a/egs/callhome_egyptian/s5/local/ctm.sh +++ b/egs/callhome_egyptian/s5/local/ctm.sh @@ -18,9 +18,9 @@ fi steps/get_ctm.sh $data_dir $lang_dir $decode_dir # Make sure that channel markers match -#sed -i "s:\s.*_fsp-([AB]): \1:g" data/dev/stm -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s1\s:fsp A :g' {} -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s2\s:fsp B :g' {} +#perl -i -pe "s:\s.*_fsp-([AB]): \1:g" data/dev/stm +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} perl -i -pe 's:fsp\s1\s:fsp A :g' {} +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} perl -i -pe 's:fsp\s2\s:fsp B :g' {} # Get the environment variables . /export/babel/data/software/env.sh diff --git a/egs/fisher_callhome_spanish/s5/local/ctm.sh b/egs/fisher_callhome_spanish/s5/local/ctm.sh index 7d09f574580..62860a10b7b 100755 --- a/egs/fisher_callhome_spanish/s5/local/ctm.sh +++ b/egs/fisher_callhome_spanish/s5/local/ctm.sh @@ -19,9 +19,9 @@ fi steps/get_ctm.sh $data_dir $lang_dir $decode_dir # Make sure that channel markers match -#sed -i "s:\s.*_fsp-([AB]): \1:g" data/dev/stm -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s1\s:fsp A :g' {} -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s2\s:fsp B :g' {} +#perl -i -pe "s:\s.*_fsp-([AB]): \1:g" data/dev/stm +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} perl -i -pe 's:fsp\s1\s:fsp A :g' {} +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} perl -i -pe 's:fsp\s2\s:fsp B :g' {} # Get the environment variables . /export/babel/data/software/env.sh diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh b/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh index 7b2de2db392..779298305c4 100755 --- a/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh +++ b/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh @@ -105,9 +105,8 @@ if [ $stage -le 4 ]; then cp "$tmpdir/lexicon.1" "$tmpdir/lexicon.2" # Add prons for laughter, noise, oov - for w in `grep -v sil $dir/silence_phones.txt`; do - sed -i "/\[$w\]/d" $tmpdir/lexicon.2 - done + w=$(grep -v sil $dir/silence_phones.txt | tr '\n' '|') + perl -i -ne "print unless /\[(${w%?})\]/" $tmpdir/lexicon.2 for w in `grep -v sil $dir/silence_phones.txt`; do echo "[$w] $w" diff --git a/egs/gale_arabic/s5/local/gale_prep_dict.sh b/egs/gale_arabic/s5/local/gale_prep_dict.sh index 74ef789eda7..f6fd83378d0 100755 --- a/egs/gale_arabic/s5/local/gale_prep_dict.sh +++ b/egs/gale_arabic/s5/local/gale_prep_dict.sh @@ -25,9 +25,8 @@ echo SIL > $dir/optional_silence.txt cat $dir/lexicon.txt | cut -d ' ' -f2- | tr -s ' ' '\n' |\ sort -u > $dir/nonsilence_phones.txt || exit 1; +perl -i -pe 'print " SIL\n" if $.==1' $dir/lexicon.txt - sed -i '1i SIL' $dir/lexicon.txt - echo Dictionary preparation succeeded exit 0 diff --git a/egs/iam/v1/local/prepare_dict.sh b/egs/iam/v1/local/prepare_dict.sh index f691d577fba..7451f6b85f7 100755 --- a/egs/iam/v1/local/prepare_dict.sh +++ b/egs/iam/v1/local/prepare_dict.sh @@ -38,7 +38,7 @@ while(<>){ }' | sort -u > $dir/lexicon.txt -sed -i "s/#//" $dir/nonsilence_phones.txt +perl -i -pe "s/#//" $dir/nonsilence_phones.txt echo ' SIL' >> $dir/lexicon.txt echo ' SIL' >> $dir/lexicon.txt diff --git a/egs/iam/v2/local/prepare_dict.sh b/egs/iam/v2/local/prepare_dict.sh index e21a59c7e92..714b5b51788 100755 --- a/egs/iam/v2/local/prepare_dict.sh +++ b/egs/iam/v2/local/prepare_dict.sh @@ -39,7 +39,7 @@ while(<>){ }' | sort -u > $dir/lexicon.txt -sed -i "s/#//" $dir/nonsilence_phones.txt +perl -i -pe "s/#//" $dir/nonsilence_phones.txt echo ' SIL' >> $dir/lexicon.txt diff --git a/egs/reverb/s5/local/download_se_eval_tool.sh b/egs/reverb/s5/local/download_se_eval_tool.sh index c7b272907b6..0d7bb8305ea 100755 --- a/egs/reverb/s5/local/download_se_eval_tool.sh +++ b/egs/reverb/s5/local/download_se_eval_tool.sh @@ -18,14 +18,14 @@ unzip REVERB_scores.zip -d local/REVERB_scores_source rm REVERB_scores.zip pushd local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools -sed -i 's/wavread/audioread/g' prog/score_sim.m +perl -i -pe 's/wavread/audioread/g' prog/score_sim.m git clone https://github.com/MuSAELab/SRMRToolbox.git -sed -i 's/wavread/audioread/g' SRMRToolbox/libs/preprocess.m -sed -i 's/SRMR_main/SRMR/g' prog/score_real.m -sed -i 's/SRMR_main/SRMR/g' prog/score_sim.m -sed -i 's/+wb\ //g' prog/calcpesq.m -sed -i 's/pesq_/_pesq_/g' prog/calcpesq.m -sed -ie '30d;31d' prog/calcpesq.m +perl -i -pe 's/wavread/audioread/g' SRMRToolbox/libs/preprocess.m +perl -i -pe 's/SRMR_main/SRMR/g' prog/score_real.m +perl -i -pe 's/SRMR_main/SRMR/g' prog/score_sim.m +perl -i -pe 's/\+wb //g' prog/calcpesq.m +perl -i -pe 's/pesq_/_pesq_/g' prog/calcpesq.m +perl -n -i -e 'print unless /remove target file name/' prog/calcpesq.m patch score_RealData.m -i ../../../score_RealData.patch -o score_RealData_new.m mv score_RealData_new.m score_RealData.m patch score_SimData.m -i ../../../score_SimData.patch -o score_SimData_new.m diff --git a/egs/yomdle_fa/v1/local/prepare_dict.sh b/egs/yomdle_fa/v1/local/prepare_dict.sh index f1b1a8d70cc..8d14130d8c0 100755 --- a/egs/yomdle_fa/v1/local/prepare_dict.sh +++ b/egs/yomdle_fa/v1/local/prepare_dict.sh @@ -18,7 +18,7 @@ mkdir -p $dir local/prepare_lexicon.py --data-dir $data_dir $dir -sed -i '/^\s*$/d' $dir/lexicon.txt +perl -i -ne 'print if /\S/' $dir/lexicon.txt cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1; echo ' SIL' >> $dir/lexicon.txt diff --git a/egs/yomdle_zh/v1/local/create_download.sh b/egs/yomdle_zh/v1/local/create_download.sh index a440a331747..1daad354473 100755 --- a/egs/yomdle_zh/v1/local/create_download.sh +++ b/egs/yomdle_zh/v1/local/create_download.sh @@ -43,4 +43,4 @@ local/create_line_image_from_page_image.py \ echo "Downloading table for CangJie." wget -P $download_dir/ $cangjie_url || exit 1; -sed -ie '1,8d' $download_dir/cj5-cc.txt +perl -n -i -e 'print if $. > 8' $download_dir/cj5-cc.txt diff --git a/tools/extras/install_portaudio.sh b/tools/extras/install_portaudio.sh index ed9529477a6..58797f554e8 100755 --- a/tools/extras/install_portaudio.sh +++ b/tools/extras/install_portaudio.sh @@ -82,7 +82,7 @@ if [ -z "$MACOS" ]; then fi ./configure --prefix=`pwd`/install --with-pic -sed -i.bk '40s:src/common/pa_ringbuffer.o::g; 40s:$: src/common/pa_ringbuffer.o:' Makefile +perl -i -pe 's:src/common/pa_ringbuffer.o:: if /^OTHER_OBJS\s*=/' Makefile if [ "$MACOS" != "" ]; then echo "detected MacOS operating system ... trying to fix Makefile" From 2f95609f0bb085bd3a1dc5eb0a39f3edea59e606 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Fri, 8 Mar 2019 12:07:19 -0800 Subject: [PATCH 065/235] [src] Rework error logging for safety and cleanliness (#3064) --- src/base/io-funcs-inl.h | 6 +- src/base/io-funcs.cc | 2 +- src/base/io-funcs.h | 2 +- src/base/kaldi-error-test.cc | 7 +- src/base/kaldi-error.cc | 256 ++++++++++---------------- src/base/kaldi-error.h | 186 +++++++++++-------- src/base/kaldi-math.cc | 6 +- src/bin/align-text.cc | 32 ++-- src/bin/draw-tree.cc | 12 +- src/feat/pitch-functions-test.cc | 4 +- src/ivector/logistic-regression.cc | 9 +- src/kwsbin/compute-atwv.cc | 5 +- src/lat/determinize-lattice-pruned.cc | 3 +- src/latbin/lattice-expand-ngram.cc | 18 +- src/lm/arpa-file-parser.cc | 2 +- src/lm/arpa-lm-compiler-test.cc | 3 +- src/matrix/tp-matrix.cc | 11 +- src/matrix/tp-matrix.h | 13 +- src/nnetbin/cuda-gpu-available.cc | 8 +- src/online/online-audio-source.cc | 8 +- src/online/online-audio-source.h | 2 +- src/onlinebin/online-audio-client.cc | 27 +-- src/tree/build-tree-utils.cc | 31 ++-- src/tree/cluster-utils-test.cc | 13 +- src/util/kaldi-table.h | 3 +- src/util/parse-options.cc | 9 +- 26 files changed, 313 insertions(+), 365 deletions(-) diff --git a/src/base/io-funcs-inl.h b/src/base/io-funcs-inl.h index 6b87f4c1a24..b703ef5addc 100644 --- a/src/base/io-funcs-inl.h +++ b/src/base/io-funcs-inl.h @@ -47,7 +47,7 @@ template void WriteBasicType(std::ostream &os, os << t << " "; } if (os.fail()) { - throw std::runtime_error("Write failure in WriteBasicType."); + KALDI_ERR << "Write failure in WriteBasicType."; } } @@ -122,7 +122,7 @@ inline void WriteIntegerPairVector(std::ostream &os, bool binary, os << "]\n"; } if (os.fail()) { - throw std::runtime_error("Write failure in WriteIntegerPairVector."); + KALDI_ERR << "Write failure in WriteIntegerPairVector."; } } @@ -224,7 +224,7 @@ template inline void WriteIntegerVector(std::ostream &os, bool binary, os << "]\n"; } if (os.fail()) { - throw std::runtime_error("Write failure in WriteIntegerVector."); + KALDI_ERR << "Write failure in WriteIntegerVector."; } } diff --git a/src/base/io-funcs.cc b/src/base/io-funcs.cc index 90988faf3ea..ff9c921874e 100644 --- a/src/base/io-funcs.cc +++ b/src/base/io-funcs.cc @@ -138,7 +138,7 @@ void WriteToken(std::ostream &os, bool binary, const char *token) { CheckToken(token); // make sure it's valid (can be read back) os << token << " "; if (os.fail()) { - throw std::runtime_error("Write failure in WriteToken."); + KALDI_ERR << "Write failure in WriteToken."; } } diff --git a/src/base/io-funcs.h b/src/base/io-funcs.h index 6c2b690f54c..b3015905785 100644 --- a/src/base/io-funcs.h +++ b/src/base/io-funcs.h @@ -46,7 +46,7 @@ namespace kaldi { We also want to have control over whitespace in text mode without affecting the meaning of the file, for pretty-printing purposes. - Errors are handled by throwing an exception (std::runtime_error). + Errors are handled by throwing a KaldiFatalError exception. For integer and floating-point types (and boolean values): diff --git a/src/base/kaldi-error-test.cc b/src/base/kaldi-error-test.cc index 527de852cac..462ad956907 100644 --- a/src/base/kaldi-error-test.cc +++ b/src/base/kaldi-error-test.cc @@ -42,13 +42,12 @@ void UnitTestError() { } // end namespace kaldi. int main() { - kaldi::g_program_name = "/foo/bar/kaldi-error-test"; + kaldi::SetProgramName("/foo/bar/kaldi-error-test"); try { kaldi::UnitTestError(); KALDI_ASSERT(0); // should not happen. exit(1); - } catch(std::runtime_error &r) { - std::cout << "UnitTestError: the error we generated was: " << r.what(); + } catch(kaldi::KaldiFatalError &e) { + std::cout << "The error we generated was: '" << e.KaldiMessage() << "'\n"; } } - diff --git a/src/base/kaldi-error.cc b/src/base/kaldi-error.cc index df03e85f148..9705936466c 100644 --- a/src/base/kaldi-error.cc +++ b/src/base/kaldi-error.cc @@ -1,5 +1,6 @@ // base/kaldi-error.cc +// Copyright 2019 SmartAction LLC (kkm) // Copyright 2016 Brno University of Technology (author: Karel Vesely) // Copyright 2009-2011 Microsoft Corporation; Lukas Burget; Ondrej Glembek @@ -35,88 +36,90 @@ namespace kaldi { + /***** GLOBAL VARIABLES FOR LOGGING *****/ int32 g_kaldi_verbose_level = 0; -const char *g_program_name = NULL; -static LogHandler g_log_handler = NULL; - -// If the program name was set (g_program_name != ""), GetProgramName -// returns the program name (without the path), e.g. "gmm-align". -// Otherwise it returns the empty string "". -const char *GetProgramName() { - return g_program_name == NULL ? "" : g_program_name; +static std::string program_name; +static LogHandler log_handler = NULL; + +void SetProgramName(const char *basename) { + // Using the 'static std::string' for the program name is mostly harmless, + // because (a) Kaldi logging is undefined before main(), and (b) no stdc++ + // string implementation has been found in the wild that would not be just + // an empty string when zero-initialized but not yet constructed. + program_name = basename; } + /***** HELPER FUNCTIONS *****/ -// Given a filename like "/a/b/c/d/e/f.cc", GetShortFileName -// returns "e/f.cc". Does not currently work if backslash is -// the filename separator. -static const char *GetShortFileName(const char *filename) { - const char *last_slash = strrchr(filename, '/'); - if (!last_slash) { - return filename; - } else { - while (last_slash > filename && last_slash[-1] != '/') - last_slash--; - return last_slash; +// Trim filename to at most 1 trailing directory long. Given a filename like +// "/a/b/c/d/e/f.cc", return "e/f.cc". Support both '/' and '\' as the path +// separator. +static const char *GetShortFileName(const char *path) { + if (path == nullptr) + return ""; + + const char *prev = path, *last = path; + while ((path = std::strpbrk(path, "\\/")) != nullptr) { + ++path; + prev = last; + last = path; } + return prev; } -/***** STACKTRACE *****/ +/***** STACK TRACE *****/ +#ifdef HAVE_EXECINFO_H static std::string Demangle(std::string trace_name) { -#if defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H) - // at input the string looks like: +#ifdef HAVE_CXXABI_H + // At input the string looks like: // ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d] - // We want to extract the name e.g. '_ZN5kaldi13UnitTestErrorEv", - // demangle it and return it. + // We want to extract the name e.g. '_ZN5kaldi13UnitTestErrorEv" + // and demangle it. - // try to locate '(' and '+', take the string in between, + // Try to locate '(' and '+', take the string in between. size_t begin(trace_name.find("(")), end(trace_name.rfind("+")); if (begin != std::string::npos && end != std::string::npos && begin < end) { - trace_name = trace_name.substr(begin+1,end-(begin+1)); + trace_name = trace_name.substr(begin + 1, end - (begin + 1)); } - // demangle, + // Try to demangle function name. int status; char *demangled_name = abi::__cxa_demangle(trace_name.c_str(), 0, 0, &status); - std::string ans; - if (status == 0) { - ans = demangled_name; + if (status == 0 && demangled_name != NULL) { + trace_name = demangled_name; free(demangled_name); - } else { - ans = trace_name; } - // return, - return ans; -#else +#endif // HAVE_CXXABI_H return trace_name; -#endif } - +#endif // HAVE_EXECINFO_H static std::string KaldiGetStackTrace() { std::string ans; #ifdef HAVE_EXECINFO_H -#define KALDI_MAX_TRACE_SIZE 50 -#define KALDI_MAX_TRACE_PRINT 20 // must be even. - // buffer for the trace, + const size_t KALDI_MAX_TRACE_SIZE = 50; + const size_t KALDI_MAX_TRACE_PRINT = 20; // Must be even. + // Buffer for the trace. void *trace[KALDI_MAX_TRACE_SIZE]; - // get the trace, + // Get the trace. size_t size = backtrace(trace, KALDI_MAX_TRACE_SIZE); - // get the trace symbols, + // Get the trace symbols. char **trace_symbol = backtrace_symbols(trace, size); + if (trace_symbol == NULL) + return ans; - // Compose the 'string', + // Compose a human-readable backtrace string. ans += "[ Stack-Trace: ]\n"; if (size <= KALDI_MAX_TRACE_PRINT) { for (size_t i = 0; i < size; i++) { ans += Demangle(trace_symbol[i]) + "\n"; } - } else { // print out first+last (e.g.) 5. + } else { // Print out first+last (e.g.) 5. for (size_t i = 0; i < KALDI_MAX_TRACE_PRINT/2; i++) { ans += Demangle(trace_symbol[i]) + "\n"; } @@ -125,11 +128,12 @@ static std::string KaldiGetStackTrace() { ans += Demangle(trace_symbol[i]) + "\n"; } if (size == KALDI_MAX_TRACE_SIZE) - ans += ".\n.\n.\n"; // stack was too long, probably a bug. + ans += ".\n.\n.\n"; // Stack was too long, probably a bug. } - // cleanup, - free(trace_symbol); // it's okay, just the pointers, not the strings. + // We must free the array of pointers allocated by backtrace_symbols(), + // but not the strings themselves. + free(trace_symbol); #endif // HAVE_EXECINFO_H return ans; } @@ -142,118 +146,55 @@ MessageLogger::MessageLogger(LogMessageEnvelope::Severity severity, // Obviously, we assume the strings survive the destruction of this object. envelope_.severity = severity; envelope_.func = func; - envelope_.file = GetShortFileName(file); // Pointer inside 'file'. + envelope_.file = GetShortFileName(file); // Points inside 'file'. envelope_.line = line; } +void MessageLogger::LogMessage() const { + // Send to the logging handler if provided. + if (log_handler != NULL) { + log_handler(envelope_, GetMessage().c_str()); + return; + } -MessageLogger::~MessageLogger() noexcept(false) { - std::string str = GetMessage(); - // print the mesage (or send to logging handler), - MessageLogger::HandleMessage(envelope_, str.c_str()); -} - -std::string MessageLogger::GetMessage() const { - // remove trailing '\n', - std::string str = ss_.str(); - while (!str.empty() && str[str.length() - 1] == '\n') - str.resize(str.length() - 1); - return str; -} - - -void MessageLogger::HandleMessage(const LogMessageEnvelope &envelope, - const char *message) { - // Send to a logging handler if provided. - if (g_log_handler != NULL) { - g_log_handler(envelope, message); + // Otherwise, use the default Kaldi logging. + // Build the log-message header. + std::stringstream full_message; + if (envelope_.severity > LogMessageEnvelope::kInfo) { + full_message << "VLOG[" << envelope_.severity << "] ("; } else { - // Otherwise, we use the default Kaldi logging. - // Build the log-message 'header', - std::stringstream header; - if (envelope.severity > LogMessageEnvelope::kInfo) { - header << "VLOG[" << envelope.severity << "] ("; - } else { - switch (envelope.severity) { - case LogMessageEnvelope::kInfo : - header << "LOG ("; - break; - case LogMessageEnvelope::kWarning : - header << "WARNING ("; - break; - case LogMessageEnvelope::kError : - header << "ERROR ("; - break; - case LogMessageEnvelope::kAssertFailed : - header << "ASSERTION_FAILED ("; - break; - default: - abort(); // coding error (unknown 'severity'), - } + switch (envelope_.severity) { + case LogMessageEnvelope::kInfo : + full_message << "LOG ("; + break; + case LogMessageEnvelope::kWarning : + full_message << "WARNING ("; + break; + case LogMessageEnvelope::kAssertFailed : + full_message << "ASSERTION_FAILED ("; + break; + case LogMessageEnvelope::kError : + default: // If not the ERROR, it still an error! + full_message << "ERROR ("; + break; } - // fill the other info from the envelope, - header << GetProgramName() << "[" KALDI_VERSION "]" << ':' - << envelope.func << "():" << envelope.file << ':' << envelope.line - << ")"; - - // Printing the message, - if (envelope.severity >= LogMessageEnvelope::kWarning) { - // VLOG, LOG, WARNING: - fprintf(stderr, "%s %s\n", header.str().c_str(), message); - } else { - // ERROR, ASSERT_FAILED (print with stack-trace): - fprintf(stderr, "%s %s\n\n%s\n", header.str().c_str(), message, - KaldiGetStackTrace().c_str()); + } + // Add other info from the envelope and the message text. + full_message << program_name.c_str() << "[" KALDI_VERSION "]" << ':' + << envelope_.func << "():" << envelope_.file << ':' + << envelope_.line << ") " << GetMessage().c_str(); + + // Add stack trace for errors and assertion failures, if available. + if (envelope_.severity < LogMessageEnvelope::kWarning) { + const std::string& stack_trace = KaldiGetStackTrace(); + if (!stack_trace.empty()) { + full_message << "\n\n" << stack_trace; } } -} -FatalMessageLogger::FatalMessageLogger(LogMessageEnvelope::Severity severity, - const char *func, const char *file, - int32 line): - MessageLogger(severity, func, file, line) { - if (severity != LogMessageEnvelope::kAssertFailed && - severity != LogMessageEnvelope::kError) { - // Don't use KALDI_ERR, since that will recursively instantiate - // MessageLogger. - throw std::runtime_error("FatalMessageLogger should be called only with " - "severities kAssertFailed and kError"); - } -} -#if defined(_MSC_VER) -FatalMessageLogger::~FatalMessageLogger [[ noreturn ]] () noexcept(false) -#else -[[ noreturn ]] FatalMessageLogger::~FatalMessageLogger() noexcept(false) -#endif -{ - std::string str = GetMessage(); - - // print the mesage (or send to logging handler), - MessageLogger::HandleMessage(envelope_, str.c_str()); - - // Should we throw exception, or abort? - switch (envelope_.severity) { - case LogMessageEnvelope::kAssertFailed: - abort(); // ASSERT_FAILED, - break; - case LogMessageEnvelope::kError: - if (!std::uncaught_exception()) { - // throw exception with empty message, - throw std::runtime_error(""); // KALDI_ERR, - } else { - // If we got here, this thread has already thrown exception, - // and this exception has not yet arrived to its 'catch' clause... - // Throwing a new exception would be unsafe! - // (can happen during 'stack unwinding', if we have 'KALDI_ERR << msg' - // in a destructor of some local object). - abort(); - } - break; - default: // This should never happen, based on constructor's - // preconditions. But we place abort() here so that all - // possible pathways through this function do not return. - abort(); - } + // Print the complete message to stderr. + full_message << "\n"; + std::cerr << full_message.str(); } @@ -261,17 +202,20 @@ FatalMessageLogger::~FatalMessageLogger [[ noreturn ]] () noexcept(false) void KaldiAssertFailure_(const char *func, const char *file, int32 line, const char *cond_str) { - FatalMessageLogger ml(LogMessageEnvelope::kAssertFailed, func, file, line); - ml.stream() << ": '" << cond_str << "' "; + MessageLogger::Log() = + MessageLogger (LogMessageEnvelope::kAssertFailed, func, file, line) + << "Assertion failed: (" << cond_str << ")"; + fflush(NULL); // Flush all pending buffers, abort() may not flush stderr. + std::abort(); } /***** THIRD-PARTY LOG-HANDLER *****/ -LogHandler SetLogHandler(LogHandler new_handler) { - LogHandler old_handler = g_log_handler; - g_log_handler = new_handler; +LogHandler SetLogHandler(LogHandler handler) { + LogHandler old_handler = log_handler; + log_handler = handler; return old_handler; } -} // end namespace kaldi +} // namespace kaldi diff --git a/src/base/kaldi-error.h b/src/base/kaldi-error.h index c643902f01b..c90a18b15f1 100644 --- a/src/base/kaldi-error.h +++ b/src/base/kaldi-error.h @@ -1,5 +1,6 @@ // base/kaldi-error.h +// Copyright 2019 SmartAction LLC (kkm) // Copyright 2016 Brno University of Technology (author: Karel Vesely) // Copyright 2009-2011 Microsoft Corporation; Ondrej Glembek; Lukas Burget; // Saarland University @@ -42,22 +43,23 @@ namespace kaldi { /// \addtogroup error_group /// @{ -/***** VERBOSITY LEVEL *****/ +/***** PROGRAM NAME AND VERBOSITY LEVEL *****/ -/// This is set by util/parse-options.{h, cc} if you set --verbose=? option. -extern int32 g_kaldi_verbose_level; +/// Called by ParseOptions to set base name (no directory) of the executing +/// program. The name is printed in logging code along with every message, +/// because in our scripts, we often mix together the stderr of many programs. +/// This function is very thread-unsafe. +void SetProgramName(const char *basename); -/// This is set by util/parse-options.{h, cc} (from argv[0]) and used (if set) -/// in error reporting code to display the name of the program (this is because -/// in our scripts, we often mix together the stderr of many programs). it is -/// the base-name of the program (no directory), followed by ':' We don't use -/// std::string, due to the static initialization order fiasco. -extern const char *g_program_name; +/// This is set by util/parse-options.{h,cc} if you set --verbose=? option. +/// Do not use directly, prefer {Get,Set}VerboseLevel(). +extern int32 g_kaldi_verbose_level; +/// Get verbosity level, usually set via command line '--verbose=' switch. inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; } -/// This should be rarely used; command-line programs set the verbose level -/// automatically from ParseOptions. +/// This should be rarely used, except by programs using Kaldi as library; +/// command-line programs set the verbose level automatically from ParseOptions. inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; } @@ -65,76 +67,106 @@ inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; } /// Log message severity and source location info. struct LogMessageEnvelope { + /// Message severity. In addition to these levels, positive values (1 to 6) + /// specify verbose logging level. Verbose messages are produced only when + /// SetVerboseLevel() has been called to set logging level to at least the + /// corresponding value. enum Severity { - kAssertFailed = -3, - kError = -2, - kWarning = -1, - kInfo = 0, + kAssertFailed = -3, //!< Assertion failure. abort() will be called. + kError = -2, //!< Fatal error. KaldiFatalError will be thrown. + kWarning = -1, //!< Indicates a recoverable but abnormal condition. + kInfo = 0, //!< Informational message. }; - // An 'enum Severity' value, or a positive number indicating verbosity level. - int severity; - const char *func; - const char *file; - int32 line; + int severity; //!< A Severity value, or positive verbosity level. + const char *func; //!< Name of the function invoking the logging. + const char *file; //!< Source file name with up to 1 leading directory. + int32 line; // + MessageLogger &operator<<(const T &val) { + ss_ << val; + return *this; + } + + // When assigned a MessageLogger, log its contents. + struct Log final { + void operator=(const MessageLogger& logger) { + logger.LogMessage(); + } + }; - /// The hook for the 'insertion operator', e.g. - /// 'KALDI_LOG << "Message,"', - inline std::ostream &stream() { return ss_; } + // When assigned a MessageLogger, log its contents and then throw + // a KaldiFatalError. + struct LogAndThrow final { + [[ noreturn ]] void operator=(const MessageLogger& logger) { + logger.LogMessage(); + throw KaldiFatalError(logger.GetMessage()); + } + }; -protected: - std::string GetMessage() const; - /// The logging function, - static void HandleMessage(const LogMessageEnvelope &env, const char *msg); +private: + std::string GetMessage() const { return ss_.str(); } + void LogMessage() const; -protected: LogMessageEnvelope envelope_; - -private: std::ostringstream ss_; }; -class FatalMessageLogger: public MessageLogger { -public: - FatalMessageLogger(LogMessageEnvelope::Severity severity, - const char *func, const char *file, int32 line); - - [[ noreturn ]] ~FatalMessageLogger() noexcept(false); -}; - -// The definition of the logging macros, +// Logging macros. #define KALDI_ERR \ - ::kaldi::FatalMessageLogger(::kaldi::LogMessageEnvelope::kError, \ - __func__, __FILE__, __LINE__).stream() + ::kaldi::MessageLogger::LogAndThrow() = \ + ::kaldi::MessageLogger(::kaldi::LogMessageEnvelope::kError, \ + __func__, __FILE__, __LINE__) #define KALDI_WARN \ - ::kaldi::MessageLogger(::kaldi::LogMessageEnvelope::kWarning, \ - __func__, __FILE__, __LINE__).stream() + ::kaldi::MessageLogger::Log() = \ + ::kaldi::MessageLogger(::kaldi::LogMessageEnvelope::kWarning, \ + __func__, __FILE__, __LINE__) #define KALDI_LOG \ - ::kaldi::MessageLogger(::kaldi::LogMessageEnvelope::kInfo, \ - __func__, __FILE__, __LINE__).stream() -#define KALDI_VLOG(v) if ((v) <= ::kaldi::g_kaldi_verbose_level) \ - ::kaldi::MessageLogger((::kaldi::LogMessageEnvelope::Severity)(v), \ - __func__, __FILE__, __LINE__).stream() + ::kaldi::MessageLogger::Log() = \ + ::kaldi::MessageLogger(::kaldi::LogMessageEnvelope::kInfo, \ + __func__, __FILE__, __LINE__) +#define KALDI_VLOG(v) \ + if ((v) <= ::kaldi::GetVerboseLevel()) \ + ::kaldi::MessageLogger::Log() = \ + ::kaldi::MessageLogger((::kaldi::LogMessageEnvelope::Severity)(v), \ + __func__, __FILE__, __LINE__) /***** KALDI ASSERTS *****/ @@ -142,17 +174,8 @@ class FatalMessageLogger: public MessageLogger { [[ noreturn ]] void KaldiAssertFailure_(const char *func, const char *file, int32 line, const char *cond_str); -// Note on KALDI_ASSERT and KALDI_PARANOID_ASSERT -// The original (simple) version of the code was this -// -// #define KALDI_ASSERT(cond) if (!(cond)) -// kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond); +// Note on KALDI_ASSERT and KALDI_PARANOID_ASSERT: // -// That worked well, but we were concerned that it -// could potentially cause a performance issue due to failed branch -// prediction (best practice is to have the if branch be the commonly -// taken one). -// Therefore, we decided to move the call into the else{} branch. // A single block {} around if /else does not work, because it causes // syntax error (unmatched else block) in the following code: // @@ -161,19 +184,21 @@ class FatalMessageLogger: public MessageLogger { // else // SomethingElse(); // -// do {} while(0) -- note there is no semicolon at the end! --- works nicely +// do {} while(0) -- note there is no semicolon at the end! -- works nicely, // and compilers will be able to optimize the loop away (as the condition // is always false). +// +// Also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h, and +// KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE, also defined +// there. #ifndef NDEBUG #define KALDI_ASSERT(cond) do { if (cond) (void)0; else \ ::kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond); } while(0) #else #define KALDI_ASSERT(cond) (void)0 #endif -// Also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h, -// and KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE, -// also defined there. -// some more expensive asserts only checked if this defined + +// Some more expensive asserts only checked if this defined. #ifdef KALDI_PARANOID #define KALDI_PARANOID_ASSERT(cond) do { if (cond) (void)0; else \ ::kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond); } while(0) @@ -184,14 +209,15 @@ class FatalMessageLogger: public MessageLogger { /***** THIRD-PARTY LOG-HANDLER *****/ -/// Type of third-party logging function, +/// Type of third-party logging function. typedef void (*LogHandler)(const LogMessageEnvelope &envelope, const char *message); /// Set logging handler. If called with a non-NULL function pointer, the -/// function pointed by it is called to send messages to a caller-provided -/// log. If called with NULL pointer, restores default Kaldi error logging to -/// stderr. SetLogHandler is obviously not thread safe. +/// function pointed by it is called to send messages to a caller-provided log. +/// If called with a NULL pointer, restores default Kaldi error logging to +/// stderr. This function is obviously not thread safe; the log handler must be. +/// Returns a previously set logging handler pointer, or NULL. LogHandler SetLogHandler(LogHandler); /// @} end "addtogroup error_group" diff --git a/src/base/kaldi-math.cc b/src/base/kaldi-math.cc index d87179b0464..17271f3c46f 100644 --- a/src/base/kaldi-math.cc +++ b/src/base/kaldi-math.cc @@ -110,10 +110,8 @@ int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) { return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state))) % (unsigned int)(max_val+1-min_val)); } else { - throw std::runtime_error(std::string() - +"rand_int failed because we do not support " - +"such large random numbers. " - +"(Extend this function)."); + KALDI_ERR << "rand_int failed because we do not support such large " + "random numbers. (Extend this function)."; } } #else diff --git a/src/bin/align-text.cc b/src/bin/align-text.cc index 616dac858d7..1c695675274 100644 --- a/src/bin/align-text.cc +++ b/src/bin/align-text.cc @@ -86,28 +86,34 @@ int main(int argc, char *argv[]) { if (!text2_reader.HasKey(key)) { KALDI_WARN << "Key " << key << " is in " << text1_rspecifier - << ", but not in " << text2_rspecifier; + << ", but not in " << text2_rspecifier; n_fail++; continue; } const std::vector &text1 = text1_reader.Value(); const std::vector &text2 = text2_reader.Value(key); - // Checks if the special symbol is in the string. - KALDI_ASSERT(std::find(text1.begin(), - text1.end(), special_symbol) == text1.end()); - KALDI_ASSERT(std::find(text2.begin(), - text2.end(), special_symbol) == text2.end()); - if (std::find_if(text1.begin(), text1.end(), IsNotToken) != text1.end()) { - KALDI_ERR << "In text1, the utterance " << key << " contains unprintable characters." \ - << "That means there is a problem with the text (such as incorrect encoding)." << std::endl; - return -1; + KALDI_ERR << "In text1, the utterance " << key + << " contains unprintable characters. That means there is" + << " a problem with the text (such as incorrect encoding)."; } if (std::find_if(text2.begin(), text2.end(), IsNotToken) != text2.end()) { - KALDI_ERR << "In text2, the utterance " << key << " contains unprintable characters." \ - << "That means there is a problem with the text (such as incorrect encoding)." << std::endl; - return -1; + KALDI_ERR << "In text2, the utterance " << key + << " contains unprintable characters. That means there is" + << " a problem with the text (such as incorrect encoding)."; + } + + // Verify that the special symbol is not in the string. + if (std::find(text1.begin(), text1.end(), special_symbol) != text1.end()){ + KALDI_ERR << "In text1, the utterance " << key + << " contains the special symbol '" << special_symbol + << "'. This is not allowed."; + } + if (std::find(text2.begin(), text2.end(), special_symbol) != text2.end()){ + KALDI_ERR << "In text2, the utterance " << key + << " contains the special symbol '" << special_symbol + << "'. This is not allowed."; } std::vector > aligned; diff --git a/src/bin/draw-tree.cc b/src/bin/draw-tree.cc index c9be5586933..d107ab1cfac 100644 --- a/src/bin/draw-tree.cc +++ b/src/bin/draw-tree.cc @@ -34,25 +34,23 @@ void MakeEvent(std::string &qry, fst::SymbolTable *phone_syms, if (key == kPdfClass) { value = static_cast(atoi(valstr.c_str())); if (value < 0) { // not valid pdf-class - KALDI_ERR << "Bad query: invalid pdf-class (" - << valstr << ')' << std::endl << std::endl; + KALDI_ERR << "Bad query: invalid pdf-class (" << valstr << ')'; } } else { value = static_cast(phone_syms->Find(valstr.c_str())); if (value == -1) { // fst::kNoSymbol - KALDI_ERR << "Bad query: invalid symbol (" - << valstr << ')' << std::endl << std::endl; + KALDI_ERR << "Bad query: invalid symbol (" << valstr << ')'; } } query_event->push_back(std::make_pair(key++, value)); old_found = found + 1; } std::string valstr = qry.substr(old_found); - EventValueType value = static_cast(phone_syms->Find(valstr.c_str())); + EventValueType value = + static_cast(phone_syms->Find(valstr.c_str())); if (value == -1) { // fst::kNoSymbol - KALDI_ERR << "Bad query: invalid symbol (" - << valstr << ')' << std::endl << std::endl; + KALDI_ERR << "Bad query: invalid symbol (" << valstr << ')'; } query_event->push_back(std::make_pair(key, value)); diff --git a/src/feat/pitch-functions-test.cc b/src/feat/pitch-functions-test.cc index 098e590a8e9..0e481c18674 100644 --- a/src/feat/pitch-functions-test.cc +++ b/src/feat/pitch-functions-test.cc @@ -449,7 +449,7 @@ static void UnitTestKeeleNccfBallast() { // use pitch code with default configuration.. PitchExtractionOptions op; op.nccf_ballast = 0.05 * k; - KALDI_LOG << " nccf_ballast " << op.nccf_ballast << std::endl; + KALDI_LOG << " nccf_ballast " << op.nccf_ballast; // compute pitch. Matrix m; ComputeKaldiPitch(op, waveform, &m); @@ -493,7 +493,7 @@ static void UnitTestPitchExtractionSpeed() { double tot_time = timer.Elapsed(), speech_time = test_num * waveform.Dim() / wave.SampFreq(); KALDI_LOG << " Pitch extraction time per second of speech is " - << (tot_time / speech_time) << " seconds " << std::endl; + << (tot_time / speech_time) << " seconds."; } } static void UnitTestPitchExtractorCompareKeele() { diff --git a/src/ivector/logistic-regression.cc b/src/ivector/logistic-regression.cc index 5d02c013294..4eae2ebe3d7 100644 --- a/src/ivector/logistic-regression.cc +++ b/src/ivector/logistic-regression.cc @@ -55,8 +55,7 @@ void LogisticRegression::Train(const Matrix &xs, weights_.SetZero(); TrainParameters(xs_with_prior, ys, conf, &xw); - KALDI_LOG << - "Finished training parameters without mixture components." << std::endl; + KALDI_LOG << "Finished training parameters without mixture components."; // If we are using mixture components, we add those components // in MixUp and retrain with the extra weights. @@ -64,8 +63,7 @@ void LogisticRegression::Train(const Matrix &xs, MixUp(ys, num_classes, conf); Matrix xw(xs_num_rows, weights_.NumRows()); TrainParameters(xs_with_prior, ys, conf, &xw); - KALDI_LOG << - "Finished training mixture components." << std::endl; + KALDI_LOG << "Finished training mixture components."; } } @@ -87,8 +85,7 @@ void LogisticRegression::MixUp(const std::vector &ys, static_cast(0)); KALDI_LOG << "Target number mixture components was " << conf.mix_up - << ". Training " << new_dim << " mixture components. " - << std::endl; + << ". Training " << new_dim << " mixture components."; int32 old_dim = weights_.NumRows(), num_components = old_dim, diff --git a/src/kwsbin/compute-atwv.cc b/src/kwsbin/compute-atwv.cc index 6d9f6d2c2bb..0907baf268a 100644 --- a/src/kwsbin/compute-atwv.cc +++ b/src/kwsbin/compute-atwv.cc @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) { if (vals.size() != 4) { KALDI_ERR << "Incorrect format of the reference file" << " -- 4 entries expected, " << vals.size() << " given!\n" - << "Key: " << kwid << std::endl; + << "Key: " << kwid; } KwsTerm inst(kwid, vals); aligner.AddRef(inst); @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) { if (vals.size() != 4) { KALDI_ERR << "Incorrect format of the hypotheses file" << " -- 4 entries expected, " << vals.size() << " given!\n" - << "Key: " << kwid << std::endl; + << "Key: " << kwid; } KwsTerm inst(kwid, vals); aligner.AddHyp(inst); @@ -171,4 +171,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/lat/determinize-lattice-pruned.cc b/src/lat/determinize-lattice-pruned.cc index 447c951d02c..22eae8199ff 100644 --- a/src/lat/determinize-lattice-pruned.cc +++ b/src/lat/determinize-lattice-pruned.cc @@ -665,8 +665,7 @@ template class LatticeDeterminizerPruned { continue; if (opts_.max_loop > 0 && counter++ > opts_.max_loop) { KALDI_ERR << "Lattice determinization aborted since looped more than " - << opts_.max_loop << " times during epsilon closure.\n"; - throw std::runtime_error("looped more than max-arcs times in lattice determinization"); + << opts_.max_loop << " times during epsilon closure."; } for (ArcIterator > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) { const Arc &arc = aiter.Value(); diff --git a/src/latbin/lattice-expand-ngram.cc b/src/latbin/lattice-expand-ngram.cc index 1b8cfbee24b..1e7625d79e0 100644 --- a/src/latbin/lattice-expand-ngram.cc +++ b/src/latbin/lattice-expand-ngram.cc @@ -36,15 +36,15 @@ int main(int argc, char *argv[]) { "Usage: lattice-expand-ngram [options] lattice-rspecifier " "lattice-wspecifier\n" "e.g.: lattice-expand-ngram --n=3 ark:lat ark:expanded_lat\n"; - + ParseOptions po(usage); int32 n = 3; std::string word_syms_filename; po.Register("n", &n, "n-gram context to expand to."); - + po.Read(argc, argv); - + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); @@ -58,10 +58,10 @@ int main(int argc, char *argv[]) { fst::UnweightedNgramFst expand_fst(n); SequentialCompactLatticeReader lat_reader(lats_rspecifier); - CompactLatticeWriter lat_writer(lats_wspecifier); + CompactLatticeWriter lat_writer(lats_wspecifier); int32 n_done = 0, n_fail = 0; - + for (; !lat_reader.Done(); lat_reader.Next()) { std::string key = lat_reader.Key(); KALDI_LOG << "Processing lattice for key " << key; @@ -69,14 +69,14 @@ int main(int argc, char *argv[]) { CompactLattice expanded_lat; ComposeDeterministicOnDemand(lat, &expand_fst, &expanded_lat); if (expanded_lat.Start() == fst::kNoStateId) { - KALDI_WARN << "Empty lattice for utterance " << key << std::endl; + KALDI_WARN << "Empty lattice for utterance " << key; n_fail++; } else { if (lat.NumStates() == expanded_lat.NumStates()) { - KALDI_LOG << "Lattice for key " << key + KALDI_LOG << "Lattice for key " << key << " did not need to be expanded for order " << n << "."; } else { - KALDI_LOG << "Lattice expanded from " << lat.NumStates() << " to " + KALDI_LOG << "Lattice expanded from " << lat.NumStates() << " to " << expanded_lat.NumStates() << " states for order " << n << "."; } lat_writer.Write(key, expanded_lat); @@ -84,7 +84,7 @@ int main(int argc, char *argv[]) { } lat_reader.FreeCurrent(); } - KALDI_LOG << "Processed " << n_done << " lattices with " << n_fail + KALDI_LOG << "Processed " << n_done << " lattices with " << n_fail << " failures."; return 0; } catch(const std::exception &e) { diff --git a/src/lm/arpa-file-parser.cc b/src/lm/arpa-file-parser.cc index f3565eabf4e..53e4a1b61bf 100644 --- a/src/lm/arpa-file-parser.cc +++ b/src/lm/arpa-file-parser.cc @@ -74,7 +74,7 @@ void ArpaFileParser::Read(std::istream &is) { warning_count_ = 0; current_line_.clear(); -#define PARSE_ERR (KALDI_ERR << LineReference() << ": ") +#define PARSE_ERR KALDI_ERR << LineReference() << ": " // Give derived class an opportunity to prepare its state. ReadStarted(); diff --git a/src/lm/arpa-lm-compiler-test.cc b/src/lm/arpa-lm-compiler-test.cc index 697d70c416a..ccfd26af7e5 100644 --- a/src/lm/arpa-lm-compiler-test.cc +++ b/src/lm/arpa-lm-compiler-test.cc @@ -209,8 +209,7 @@ bool ThrowsExceptionTest(bool seps, const string &infile) { // Make memory cleanup easy in both cases of try-catch block. std::unique_ptr compiler(Compile(seps, infile)); return false; - } catch (const std::runtime_error&) { - // Kaldi throws only std::runtime_error in kaldi-error.cc + } catch (const KaldiFatalError&) { return true; } } diff --git a/src/matrix/tp-matrix.cc b/src/matrix/tp-matrix.cc index f01ee1e8f46..6e34dc643e9 100644 --- a/src/matrix/tp-matrix.cc +++ b/src/matrix/tp-matrix.cc @@ -51,7 +51,7 @@ void TpMatrix::Invert() { // format, so we temporarily put in non-packed format. Matrix tmp(*this); int rows = static_cast(this->num_rows_); - + // ATLAS call. It's really row-major ordering and a lower triangular matrix, // but there is some weirdness with Fortran-style indexing that we need to // take account of, so everything gets swapped. @@ -102,14 +102,13 @@ void TpMatrix::Cholesky(const SpMatrix &orig) { } // d = orig(j, j) - d; d = orig_jdata[j] - d; - + if (d >= 0.0) { // (*this)(j, j) = std::sqrt(d); jdata[j] = std::sqrt(d); } else { - KALDI_WARN << "Cholesky decomposition failed. Maybe matrix " - "is not positive definite. Throwing error"; - throw std::runtime_error("Cholesky decomposition failed."); + KALDI_ERR << "Cholesky decomposition failed. Maybe matrix " + "is not positive definite."; } } } @@ -144,5 +143,3 @@ template class TpMatrix; template class TpMatrix; } // namespace kaldi - - diff --git a/src/matrix/tp-matrix.h b/src/matrix/tp-matrix.h index b215e73b000..e3b08701543 100644 --- a/src/matrix/tp-matrix.h +++ b/src/matrix/tp-matrix.h @@ -45,11 +45,11 @@ class TpMatrix : public PackedMatrix { /// Copy constructor from CUDA TpMatrix /// This is defined in ../cudamatrix/cu-tp-matrix.cc explicit TpMatrix(const CuTpMatrix &cu); - - + + template explicit TpMatrix(const TpMatrix& orig) : PackedMatrix(orig) {} - + Real operator() (MatrixIndexT r, MatrixIndexT c) const { if (static_cast(c) > static_cast(r)) { @@ -74,9 +74,9 @@ class TpMatrix : public PackedMatrix { return *(this->data_ + (r*(r+1)) / 2 + c); // Duplicating code from PackedMatrix.h } - // Note: Cholesky may throw std::runtime_error + // Note: Cholesky may throw KaldiFatalError. void Cholesky(const SpMatrix& orig); - + void Invert(); // Inverts in double precision. @@ -99,7 +99,7 @@ class TpMatrix : public PackedMatrix { /// This is implemented in ../cudamatrix/cu-tp-matrix.cc void CopyFromMat(const CuTpMatrix &other); - + /// CopyFromTp copies another triangular matrix into this one. void CopyFromTp(const TpMatrix &other) { PackedMatrix::CopyFromPacked(other); @@ -132,4 +132,3 @@ class TpMatrix : public PackedMatrix { #endif - diff --git a/src/nnetbin/cuda-gpu-available.cc b/src/nnetbin/cuda-gpu-available.cc index 390468d3046..41d0227ce08 100644 --- a/src/nnetbin/cuda-gpu-available.cc +++ b/src/nnetbin/cuda-gpu-available.cc @@ -46,8 +46,7 @@ int main(int argc, char *argv[]) try { KALDI_WARN << "Cannot get hostname, " << strerror(errno); } #endif - KALDI_LOG << std::endl << std::endl - << "### IS CUDA GPU AVAILABLE? '" << hostname << "' ###"; + KALDI_LOG << "\n\n### IS CUDA GPU AVAILABLE? '" << hostname << "' ###"; #if HAVE_CUDA == 1 CuDevice::Instantiate().SelectGpuId("yes"); fprintf(stderr, "### HURRAY, WE GOT A CUDA GPU FOR COMPUTATION!!! ##\n\n"); @@ -76,9 +75,9 @@ int main(int argc, char *argv[]) try { return 0; #else std::cerr - << "### CUDA WAS NOT COMPILED IN! ###" << std::endl + << "### CUDA WAS NOT COMPILED IN! ###\n" << "To support CUDA, you must run 'configure' on a machine " - << "that has the CUDA compiler 'nvcc' available."; + << "that has the CUDA compiler 'nvcc' available.\n"; return 1; #endif } catch (const std::exception &e) { @@ -95,4 +94,3 @@ int main(int argc, char *argv[]) try { << "### - You should see your GPU (burnt GPUs may disappear from the list until reboot),"; return -1; } - diff --git a/src/online/online-audio-source.cc b/src/online/online-audio-source.cc index 7b3c31682aa..5998be0690f 100644 --- a/src/online/online-audio-source.cc +++ b/src/online/online-audio-source.cc @@ -72,18 +72,18 @@ OnlinePaSource::OnlinePaSource(const uint32 timeout, &pa_ringbuf_, sizeof(SampleType), rb_size_ / sizeof(SampleType), ring_buffer_); if (rbs != 0) - throw runtime_error("Unexpected PortAudio ring buffer init error"); + KALDI_ERR << "PortAudio ring buffer init error"; PaError paerr = Pa_Initialize(); if (paerr != paNoError) - throw runtime_error("PortAudio initialization error"); + KALDI_ERR << "PortAudio initialization error"; // Monophone, 16-bit input hardcoded KALDI_ASSERT(sizeof(SampleType) == 2 && "The current OnlinePaSource code assumes 16-bit input"); paerr = Pa_OpenDefaultStream(&pa_stream_, 1, 0, paInt16, sample_rate_, 0, PaCallback, this); if (paerr != paNoError) - throw runtime_error("PortAudio failed to open the default stream"); + KALDI_ERR << "PortAudio failed to open the default stream"; } @@ -103,7 +103,7 @@ bool OnlinePaSource::Read(Vector *data) { if (!pa_started_) { // start stream the first time Read() is called PaError paerr = Pa_StartStream(pa_stream_); if (paerr != paNoError) - throw std::runtime_error("Error while trying to open PortAudio stream"); + KALDI_ERR << "Error while trying to open PortAudio stream"; pa_started_ = true; } Timer timer; diff --git a/src/online/online-audio-source.h b/src/online/online-audio-source.h index d880660d24f..64153e9cd52 100644 --- a/src/online/online-audio-source.h +++ b/src/online/online-audio-source.h @@ -42,7 +42,7 @@ class OnlineAudioSourceItf { // The function returns true if there may be more data, and false if it // knows we are at the end of the stream. // In case an unexpected and unrecoverable error occurs the function throws - // an exception of type std::runtime_error (e.g. by using KALDI_ERR macro). + // an exception of type KaldiFatalError (by using KALDI_ERR macro). // // NOTE: The older version of this interface had a second paramater - "timeout". // We decided to remove it, because we don't envision usage scenarios, diff --git a/src/onlinebin/online-audio-client.cc b/src/onlinebin/online-audio-client.cc index 241aee426cc..577204b65e7 100644 --- a/src/onlinebin/online-audio-client.cc +++ b/src/onlinebin/online-audio-client.cc @@ -85,7 +85,7 @@ int main(int argc, char** argv) { int32 client_desc = socket(AF_INET, SOCK_STREAM, 0); if (client_desc == -1) { - std::cerr << "ERROR: couldn't create socket!" << std::endl; + std::cerr << "ERROR: couldn't create socket!\n"; return -1; } @@ -96,8 +96,8 @@ int main(int argc, char** argv) { if (addr == INADDR_NONE) { hp = gethostbyname(server_addr_str.c_str()); if (hp == NULL) { - std::cerr << "ERROR: couldn't resolve host string: " << server_addr_str - << std::endl; + std::cerr << "ERROR: couldn't resolve host string: " + << server_addr_str << '\n'; close(client_desc); return -1; } @@ -110,13 +110,13 @@ int main(int argc, char** argv) { server.sin_family = AF_INET; server.sin_port = htons(server_port); if (::connect(client_desc, (struct sockaddr*) &server, sizeof(server))) { - std::cerr << "ERROR: couldn't connect to server!" << std::endl; + std::cerr << "ERROR: couldn't connect to server!\n"; close(client_desc); return -1; } KALDI_VLOG(2) << "Connected to KALDI server at host " << server_addr_str - << " port " << server_port << std::endl; + << " port " << server_port; char* pack_buffer = new char[packet_size]; @@ -124,7 +124,7 @@ int main(int argc, char** argv) { for (; !reader.Done(); reader.Next()) { std::string wav_key = reader.Key(); - KALDI_VLOG(2) << "File: " << wav_key << std::endl; + KALDI_VLOG(2) << "File: " << wav_key; const WaveData &wav_data = reader.Value(); @@ -257,8 +257,7 @@ int main(int argc, char** argv) { { float speed = total_input_dur / total_reco_dur; - KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output - << std::endl; + KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output; } if (htk) { @@ -266,7 +265,8 @@ int main(int argc, char** argv) { std::ofstream htk_file(name.c_str()); for (size_t i = 0; i < results.size(); i++) htk_file << (int) (results[i].start * 10000000) << " " - << (int) (results[i].end * 10000000) << " " << results[i].word << std::endl; + << (int) (results[i].end * 10000000) << " " + << results[i].word << "\n"; htk_file.close(); } @@ -309,12 +309,13 @@ int main(int argc, char** argv) { std::string name = wav_key + ".vtt"; std::ofstream vtt_file(name.c_str()); - vtt_file << "WEBVTT FILE" << std::endl << std::endl; + vtt_file << "WEBVTT FILE\n\n"; for (size_t i = 0; i < subtitles.size(); i++) - vtt_file << (i + 1) << std::endl << TimeToTimecode(subtitles[i].start) - << " --> " << TimeToTimecode(subtitles[i].end) << std::endl - << subtitles[i].word << std::endl << std::endl; + vtt_file << (i + 1) << "\n" + << TimeToTimecode(subtitles[i].start) << " --> " + << TimeToTimecode(subtitles[i].end) << "\n" + << subtitles[i].word << "\n\n"; vtt_file.close(); } diff --git a/src/tree/build-tree-utils.cc b/src/tree/build-tree-utils.cc index 4c9be833185..254d7ec36d8 100644 --- a/src/tree/build-tree-utils.cc +++ b/src/tree/build-tree-utils.cc @@ -400,7 +400,7 @@ BaseFloat FindBestSplitForKey(const BuildTreeStatsType &stats, for (size_t i = 0;i < assignments.size();i++) if (assignments[i] == 1) yes_set.push_back(i); } *yes_set_out = yes_set; - + DeletePointers(&clusters); #ifdef KALDI_PARANOID { // Check the "ans" is correct. @@ -763,10 +763,9 @@ EventMap *GetToLengthMap(const BuildTreeStatsType &stats, int32 P, std::vector stats_by_phone; try { SplitStatsByKey(stats, P, &stats_by_phone); - } catch(const std::runtime_error &err) { - KALDI_ERR << "Caught exception in GetToLengthMap: you seem " - "to have provided invalid stats [no central-phone " - "key]. Message was: " << err.what(); + } catch(const KaldiFatalError &) { + KALDI_ERR << + "You seem to have provided invalid stats [no central-phone key]."; } std::map phone_to_length; for (size_t p = 0; p < stats_by_phone.size(); p++) { @@ -774,10 +773,9 @@ EventMap *GetToLengthMap(const BuildTreeStatsType &stats, int32 P, std::vector stats_by_length; try { SplitStatsByKey(stats_by_phone[p], kPdfClass, &stats_by_length); - } catch(const std::runtime_error &err) { - KALDI_ERR << "Caught exception in GetToLengthMap: you seem " - "to have provided invalid stats [no position " - "key]. Message was: " << err.what(); + } catch(const KaldiFatalError &) { + KALDI_ERR << + "You seem to have provided invalid stats [no position key]."; } size_t length = stats_by_length.size(); for (size_t i = 0; i < length; i++) { @@ -868,7 +866,7 @@ EventMap *ClusterEventMapToNClustersRestrictedByMap( int32 *num_removed_ptr) { std::vector split_stats; SplitStatsByMap(stats, e_restrict, &split_stats); - + if (num_clusters_required < split_stats.size()) { KALDI_WARN << "num-clusters-required is less than size of map. Not doing anything."; if (num_removed_ptr) *num_removed_ptr = 0; @@ -904,10 +902,10 @@ EventMap *ClusterEventMapToNClustersRestrictedByMap( if (j > max_index) max_index = j; } } - + normalizer += SumClusterableNormalizer(summed_stats_contiguous[i]); - } else { - // Even if split_stats[i] is empty, a cluster will be assigned to + } else { + // Even if split_stats[i] is empty, a cluster will be assigned to // that. To compensate, we decrease the num-clusters required. num_non_empty_clusters_required--; } @@ -919,7 +917,7 @@ EventMap *ClusterEventMapToNClustersRestrictedByMap( if (num_non_empty_clusters_required > num_non_empty_clusters) { KALDI_WARN << "Cannot get required num-clusters " << num_clusters_required << " as number of non-empty clusters required is larger than " - << " number of non-empty clusters: " << num_non_empty_clusters_required + << " number of non-empty clusters: " << num_non_empty_clusters_required << " > " << num_non_empty_clusters; if (num_removed_ptr) *num_removed_ptr = 0; return e_in.Copy(); @@ -929,7 +927,7 @@ EventMap *ClusterEventMapToNClustersRestrictedByMap( BaseFloat change = ClusterBottomUpCompartmentalized( summed_stats_contiguous, std::numeric_limits::infinity(), - num_non_empty_clusters_required, + num_non_empty_clusters_required, NULL, // don't need clusters out. &assignments); // this algorithm is quadratic, so might be quite slow. @@ -1052,7 +1050,7 @@ EventMap *GetStubMap(int32 P, // Do a split. Recurse. size_t half_sz = phone_sets.size() / 2; std::vector >::const_iterator half_phones = - phone_sets.begin() + half_sz; + phone_sets.begin() + half_sz; std::vector::const_iterator half_share = share_roots.begin() + half_sz; std::vector > phone_sets_1, phone_sets_2; @@ -1127,4 +1125,3 @@ bool ConvertStats(int32 oldN, int32 oldP, int32 newN, int32 newP, } // end namespace kaldi - diff --git a/src/tree/cluster-utils-test.cc b/src/tree/cluster-utils-test.cc index fd5d9690939..8eee3fb5505 100644 --- a/src/tree/cluster-utils-test.cc +++ b/src/tree/cluster-utils-test.cc @@ -97,10 +97,11 @@ static void TestObjfPlus() { AssertEqual(a.Objf(), (BaseFloat)0.0); AssertEqual(b.Objf(), (BaseFloat)0.0); AssertEqual( a.ObjfPlus(b), -0.5 * (1.0-2.5)*(1.0-2.5)); // 0.5 because half-distance, squared = 1/4, times two points... - KALDI_LOG << "Non-binary Output: "<<'\n'; - a.Write(KALDI_LOG, false); - KALDI_LOG << "Binary Output: "<<'\n'; - a.Write(KALDI_LOG, true); + KALDI_LOG << "Non-binary Output:"; + a.Write(std::cerr, false); + std::cerr << "\nBinary Output:\n"; + a.Write(std::cerr, true); + std::cerr << "\n"; } static void TestObjfMinus() { @@ -395,7 +396,7 @@ static void TestClusterKMeansVector() { std::vector points; for (size_t j = 0; j < n_clust; j++) { size_t n_points = 1 + Rand() % 5; - + Vector clust_center(dim); clust_center.SetRandn(); for (size_t k = 0; k < n_points; k++) { @@ -573,5 +574,3 @@ int main() { TestClusterBottomUp(); TestRefineClusters(); } - - diff --git a/src/util/kaldi-table.h b/src/util/kaldi-table.h index e3a80b2743b..bb7177ad051 100644 --- a/src/util/kaldi-table.h +++ b/src/util/kaldi-table.h @@ -383,8 +383,7 @@ class TableWriter { // Returns true if open for writing. bool IsOpen() const; - // Write the object. Throws std::runtime_error on error (via the - // KALDI_ERR macro) + // Write the object. Throws KaldiFatalError on error via the KALDI_ERR macro. inline void Write(const std::string &key, const T &value) const; diff --git a/src/util/parse-options.cc b/src/util/parse-options.cc index 2f75cb655f9..667d9e91c94 100644 --- a/src/util/parse-options.cc +++ b/src/util/parse-options.cc @@ -323,14 +323,7 @@ int ParseOptions::Read(int argc, const char *const argv[]) { #else const char *c = strrchr(argv[0], '/'); #endif - if (c == NULL) - c = argv[0]; - else - c++; - char *program_name = new char[strlen(c)+1]; - strcpy(program_name, c); - delete [] g_program_name; - g_program_name = program_name; + SetProgramName(c == NULL ? argv[0] : c + 1); } // first pass: look for config parameter, look for priority for (i = 1; i < argc; i++) { From bcfe3f8c5063e06b632e9b811a4b27531192558d Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Sun, 10 Mar 2019 14:13:58 -0400 Subject: [PATCH 066/235] [src] Change warp-synchronous to cub::BlockReduce (safer but slower) (#3080) --- src/cudamatrix/cu-kernels.cu | 171 ++++++++--------------------------- 1 file changed, 38 insertions(+), 133 deletions(-) diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 5a5307b9f87..17d56a05772 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -28,7 +28,7 @@ #include #include #include "cudamatrix/cu-kernels-ansi.h" - +#include /*********************************************************************** @@ -958,6 +958,7 @@ static void _trace_mat_mat(const Real* A, const Real* B, MatrixDim dA, Real trans[TileDim][TileDim + 1]; Real sum[CU1DBLOCK]; } smem; + // linear thread id; const int32_cuda tid = threadIdx.y * blockDim.x + threadIdx.x; const int32_cuda grid_height = gridDim.y * TileDim; @@ -1021,6 +1022,7 @@ static void _trace_mat_mat(const Real* A, const Real* B, MatrixDim dA, if (tid == 0) { value[blockIdx.y * gridDim.x + blockIdx.x] = smem.sum[0]; } + } // _trace_mat_mat_trans reduce the partial sum to @@ -1030,6 +1032,7 @@ __global__ static void _trace_mat_mat_trans(const Real* A, const Real* B, MatrixDim dA, int B_stride, Real* value) { __shared__ Real ssum[CU1DBLOCK]; + // linear thread id; const int32_cuda tid = threadIdx.y * blockDim.x + threadIdx.x; const int32_cuda j = blockIdx.x * blockDim.x + threadIdx.x; @@ -1046,7 +1049,7 @@ static void _trace_mat_mat_trans(const Real* A, const Real* B, MatrixDim dA, } ssum[tid] = tsum; __syncthreads(); - + // Block reduce # pragma unroll for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { @@ -2485,6 +2488,8 @@ template __global__ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) { __shared__ Real smem[CU1DBLOCK]; + typedef cub::BlockReduce BlockReduceT; + __shared__ typename BlockReduceT::TempStorage temp_storage; const int i = blockIdx.x; const int x_start = i * src_stride; const int y_start = i * d.stride; @@ -2496,24 +2501,9 @@ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) { for (int j = tid; j < d.cols; j += CU1DBLOCK) { tmax = fmax(tmax, x[x_start + j]); } - smem[tid] = tmax; - __syncthreads(); - - // reduce to 2x warpSize elements per row -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) { - smem[tid] = fmax(smem[tid], smem[tid + shift]); - } - __syncthreads(); - } - - // reduce to 1 element per row - if (tid < warpSize) { -# pragma unroll - for (int shift = warpSize; shift > 0; shift >>= 1) { - smem[tid] = fmax(smem[tid], smem[tid + shift]); - } + tmax = BlockReduceT(temp_storage).Reduce(tmax, cub::Max()); + if (tid == 0) { + smem[0] = tmax; } // broadcast max to all threads @@ -2526,24 +2516,9 @@ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) { for (int j = tid; j < d.cols; j += CU1DBLOCK) { tsum += exp(x[x_start + j] - max); } - smem[tid] = tsum; - __syncthreads(); - - // reduce to 2x warpSize elements per row -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) { - smem[tid] += smem[tid + shift]; - } - __syncthreads(); - } - - // reduce to 1 element per row - if (tid < warpSize) { -# pragma unroll - for (int shift = warpSize; shift > 0; shift >>= 1) { - smem[tid] += smem[tid + shift]; - } + tsum = BlockReduceT(temp_storage).Sum(tsum); + if (tid == 0) { + smem[0] = tsum; } // broadcast sum to all threads @@ -2577,6 +2552,8 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, const int i = blockIdx.x; const int tid = threadIdx.x; const Real* x_row = x + i * x_d.stride; + typedef cub::BlockReduce BlockReduceT; + __shared__ typename BlockReduceT::TempStorage temp_storage; __shared__ Real ssum[CU1DBLOCK]; // Reduce x_j^2 to CU1DBLOCK elements per row @@ -2584,34 +2561,14 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, for (int j = tid; j < x_d.cols; j += CU1DBLOCK) { tsum += x_row[j] * x_row[j]; } - ssum[tid] = tsum; + tsum = BlockReduceT(temp_storage).Sum(tsum); __syncthreads(); - - // Tree reduce to 2x warpSize elements per row -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) - ssum[tid] += ssum[tid + shift]; - __syncthreads(); - } - - // Reduce last warp to 1 element per row. - // Threads implicitly synchronized within a warp. - if (tid < warpSize) { -# pragma unroll - for (int shift = warpSize; shift > 0; shift >>= 1) { - ssum[tid] += ssum[tid + shift]; - } - } + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 - if (tid == 0) { - ssum[0] = sqrt( - fmax(ssum[0] / (target_rms * target_rms * x_d.cols), kSquaredNormFloor)); - } + ssum[tid] = sqrt( + fmax(tsum / (target_rms * target_rms * x_d.cols), kSquaredNormFloor)); - // Broadcast floored stddev to all threads. - __syncthreads(); const Real stddev_div_target_rms = ssum[0]; const Real scale = Real(1) / stddev_div_target_rms; @@ -2626,7 +2583,6 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, } } - template __global__ static void _diff_normalize_per_row(Real *id, int id_stride, const Real *iv, @@ -2722,6 +2678,8 @@ __global__ static void _log_softmax_reduce(Real* y, const Real* x, MatrixDim y_dim, int x_stride) { __shared__ Real smem[CU1DBLOCK]; + typedef cub::BlockReduce BlockReduceT; + __shared__ typename BlockReduceT::TempStorage temp_storage; const int i = blockIdx.x; const int x_start = i * x_stride; const int y_start = i * y_dim.stride; @@ -2733,23 +2691,9 @@ static void _log_softmax_reduce(Real* y, const Real* x, MatrixDim y_dim, for (int j = tid; j < y_dim.cols; j += CU1DBLOCK) { tmax = fmax(tmax, x[x_start + j]); } - smem[tid] = tmax; - __syncthreads(); - - // reduce to 2x warpSize elements per row -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) { - smem[tid] = fmax(smem[tid], smem[tid + shift]); - } - __syncthreads(); - } - - // reduce to 1 element per row - if (tid < warpSize) { - for (int shift = warpSize; shift > 0; shift >>= 1) { - smem[tid] = fmax(smem[tid], smem[tid + shift]); - } + tmax = BlockReduceT(temp_storage).Reduce(tmax, cub::Max()); + if (tid == 0) { + smem[0] = tmax; } // broadcast max to all threads @@ -2762,23 +2706,9 @@ static void _log_softmax_reduce(Real* y, const Real* x, MatrixDim y_dim, for (int j = tid; j < y_dim.cols; j += CU1DBLOCK) { tsum += exp(x[x_start + j] - max); } - smem[tid] = tsum; - __syncthreads(); - - // reduce to 2x warpSize elements per row -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) { - smem[tid] += smem[tid + shift]; - } - __syncthreads(); - } - - // reduce to 1 element per row - if (tid < warpSize) { - for (int shift = warpSize; shift > 0; shift >>= 1) { - smem[tid] += smem[tid + shift]; - } + tsum = BlockReduceT(temp_storage).Sum(tsum); + if (tid == 0) { + smem[0] = tsum; } // broadcast sum to all threads @@ -3024,6 +2954,9 @@ static void _diff_softmax(Real* x, const MatrixDim dim, const Real* value, const int value_stride, const Real* diff, const int diff_stride) { __shared__ Real ssum[CU1DBLOCK]; + typedef cub::BlockReduce BlockReduceT; + __shared__ typename BlockReduceT::TempStorage temp_storage; + const int tid = threadIdx.x; const int i = blockIdx.x; const int value_start = i * value_stride; @@ -3035,24 +2968,9 @@ static void _diff_softmax(Real* x, const MatrixDim dim, const Real* value, for (int j = tid; j < dim.cols; j += CU1DBLOCK) { tsum += value[value_start + j] * diff[diff_start + j]; } - ssum[tid] = tsum; - __syncthreads(); - - // Tree reduce to 2x warpSize elements. -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) { - ssum[tid] += ssum[tid + shift]; - } - __syncthreads(); - } - - // Warp reduce to 1 element. Threads implicitly synchronized within a warp. - if (tid < warpSize) { -# pragma unroll - for (int shift = warpSize; shift > 0; shift >>= 1) { - ssum[tid] += ssum[tid + shift]; - } + tsum = BlockReduceT(temp_storage).Sum(tsum); + if (tid == 0) { + ssum[0] = tsum; } // Broadcast result to all threads @@ -3078,6 +2996,8 @@ static void _diff_log_softmax(const MatrixDim in_deriv_dim, Real* in_deriv) { __shared__ Real ssum[CU1DBLOCK]; + typedef cub::BlockReduce BlockReduceT; + __shared__ typename BlockReduceT::TempStorage temp_storage; const int tid = threadIdx.x; const int i = blockIdx.x; const int out_value_start = i * out_value_stride; @@ -3089,24 +3009,9 @@ static void _diff_log_softmax(const MatrixDim in_deriv_dim, for (int j = tid; j < in_deriv_dim.cols; j += CU1DBLOCK) { tsum += out_deriv[out_deriv_start + j]; } - ssum[tid] = tsum; - __syncthreads(); - - // Tree reduce to 2x warpSize elements. -# pragma unroll - for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { - if (tid < shift) { - ssum[tid] += ssum[tid + shift]; - } - __syncthreads(); - } - - // Warp reduce to 1 element. Threads implicitly synchronized within a warp. - if (tid < warpSize) { -# pragma unroll - for (int shift = warpSize; shift > 0; shift >>= 1) { - ssum[tid] += ssum[tid + shift]; - } + tsum = BlockReduceT(temp_storage).Sum(tsum); + if (tid == 0) { + ssum[0] = tsum; } // Broadcast result to all threads From 1209c07da6676228afc12838235f7147be65df54 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Sun, 10 Mar 2019 19:16:54 -0700 Subject: [PATCH 067/235] [src] Fix && and || uses where & and | intended, and other weird errors (#3087) --- src/bin/compute-wer-bootci.cc | 6 ++-- src/fstext/determinize-lattice-inl.h | 4 +-- src/fstext/lattice-weight.h | 3 +- src/gmm/mle-diag-gmm.h | 12 +++---- src/gmm/mle-full-gmm.h | 8 ++--- src/nnet2/combine-nnet-fast.cc | 2 +- src/nnet2/combine-nnet.cc | 48 +++++++++++++-------------- src/nnet3/nnet-analyze.cc | 2 +- src/nnet3/nnet-chain-training.cc | 4 +-- src/nnet3/nnet-simple-component.cc | 8 ++--- src/nnet3/nnet-training.cc | 4 +-- src/rnnlm/rnnlm-core-training.cc | 4 +-- src/rnnlm/rnnlm-embedding-training.cc | 12 +++---- src/tree/build-tree-questions.h | 6 ++-- src/util/kaldi-pipebuf.h | 3 +- 15 files changed, 63 insertions(+), 63 deletions(-) diff --git a/src/bin/compute-wer-bootci.cc b/src/bin/compute-wer-bootci.cc index b8b0697af75..ba2a4ce739c 100644 --- a/src/bin/compute-wer-bootci.cc +++ b/src/bin/compute-wer-bootci.cc @@ -162,7 +162,7 @@ int main(int argc, char *argv[]) { try { const char *usage = - "Compute a bootstrapping of WER to extract the 95\% confidence interval.\n" + "Compute a bootstrapping of WER to extract the 95% confidence interval.\n" "Take a reference and a transcription file, in integer or text format,\n" "and outputs overall WER statistics to standard output along with its\n" "confidence interval using the bootstrap method of Bisani and Ney.\n" @@ -234,12 +234,12 @@ int main(int argc, char *argv[]) { std::cout.precision(2); std::cerr.precision(2); std::cout << "Set1: %WER " << std::fixed << 100*mean_wer << - " 95\% Conf Interval [ " << 100*mean_wer-100*interval << + " 95% Conf Interval [ " << 100*mean_wer-100*interval << ", " << 100*mean_wer+100*interval << " ]" << '\n'; if(!hyp2_rspecifier.empty()) { std::cout << "Set2: %WER " << std::fixed << 100*mean_wer2 << - " 95\% Conf Interval [ " << 100*mean_wer2-100*interval2 << + " 95% Conf Interval [ " << 100*mean_wer2-100*interval2 << ", " << 100*mean_wer2+100*interval2 << " ]" << '\n'; std::cout << "Probability of Set2 improving Set1: " << std::fixed << diff --git a/src/fstext/determinize-lattice-inl.h b/src/fstext/determinize-lattice-inl.h index 43ad809f70e..775228bfd21 100644 --- a/src/fstext/determinize-lattice-inl.h +++ b/src/fstext/determinize-lattice-inl.h @@ -510,7 +510,7 @@ template class LatticeDeterminizer { if (!CheckMemoryUsage()) return false; } return (determinized_ = true); - } catch (std::bad_alloc) { + } catch (const std::bad_alloc &) { int32 repo_size = repository_.MemSize(), arcs_size = num_arcs_ * sizeof(TempArc), elems_size = num_elems_ * sizeof(Element), @@ -520,7 +520,7 @@ template class LatticeDeterminizer { << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << "," << elems_size << ")"; return (determinized_ = false); - } catch (std::runtime_error) { + } catch (const std::runtime_error &) { KALDI_WARN << "Caught exception doing lattice determinization"; return (determinized_ = false); } diff --git a/src/fstext/lattice-weight.h b/src/fstext/lattice-weight.h index af4826f7bed..86bec97d4e8 100644 --- a/src/fstext/lattice-weight.h +++ b/src/fstext/lattice-weight.h @@ -179,8 +179,7 @@ class LatticeWeightTpl { } else if (s == "-Infinity") { f = -numeric_limits::infinity(); } else if (s == "BadNumber") { - f = numeric_limits::infinity(); - f -= f; // get NaN + f = numeric_limits::quiet_NaN(); } else { char *p; f = strtod(s.c_str(), &p); diff --git a/src/gmm/mle-diag-gmm.h b/src/gmm/mle-diag-gmm.h index 24194ef886a..d41d36489bf 100644 --- a/src/gmm/mle-diag-gmm.h +++ b/src/gmm/mle-diag-gmm.h @@ -85,7 +85,7 @@ struct MapDiagGmmOptions { /// Tau value for the weights-- this tau value is applied /// per state, not per Gaussian. BaseFloat weight_tau; - + MapDiagGmmOptions(): mean_tau(10.0), variance_tau(50.0), weight_tau(10.0) { } @@ -150,8 +150,8 @@ class AccumDiagGmm { const MatrixBase &data, const VectorBase &frame_weights, int32 num_threads); - - + + /// Increment the stats for this component by the specified amount /// (not all parts may be taken, depending on flags). /// Note: x_stats and x2_stats are assumed to already be multiplied by "occ" @@ -162,7 +162,7 @@ class AccumDiagGmm { /// Increment with stats from this other accumulator (times scale) void Add(double scale, const AccumDiagGmm &acc); - + /// Smooths the accumulated counts by adding 'tau' extra frames. An example /// use for this is I-smoothing for MMIE. Calls SmoothWithAccum. void SmoothStats(BaseFloat tau); @@ -179,13 +179,13 @@ class AccumDiagGmm { void SmoothWithModel(BaseFloat tau, const DiagGmm &src_gmm); // Const accessors - const GmmFlagsType Flags() const { return flags_; } + GmmFlagsType Flags() const { return flags_; } const VectorBase &occupancy() const { return occupancy_; } const MatrixBase &mean_accumulator() const { return mean_accumulator_; } const MatrixBase &variance_accumulator() const { return variance_accumulator_; } // used in testing. - void AssertEqual(const AccumDiagGmm &other); + void AssertEqual(const AccumDiagGmm &other); private: int32 dim_; int32 num_comp_; diff --git a/src/gmm/mle-full-gmm.h b/src/gmm/mle-full-gmm.h index 6e770764e1e..618714b0e9b 100644 --- a/src/gmm/mle-full-gmm.h +++ b/src/gmm/mle-full-gmm.h @@ -1,7 +1,7 @@ // gmm/mle-full-gmm.h // Copyright 2009-2011 Jan Silovsky; Saarland University; -// Microsoft Corporation; +// Microsoft Corporation; // Univ. Erlangen Nuremberg, Korbinian Riedhammer // See ../../COPYING for clarification regarding multiple authors @@ -91,7 +91,7 @@ class AccumFullGmm { void Resize(int32 num_components, int32 dim, GmmFlagsType flags); /// Calls Resize with arguments based on gmm_ptr_ void Resize(const FullGmm &gmm, GmmFlagsType flags); - + void ResizeVarAccumulator(int32 num_comp, int32 dim); /// Returns the number of mixture components int32 NumGauss() const { return num_comp_; } @@ -122,8 +122,8 @@ class AccumFullGmm { const VectorBase &data, BaseFloat frame_posterior); - /// Accessors - const GmmFlagsType Flags() const { return flags_; } + /// Accessors + GmmFlagsType Flags() const { return flags_; } const Vector &occupancy() const { return occupancy_; } const Matrix &mean_accumulator() const { return mean_accumulator_; } const std::vector > &covariance_accumulator() const { return covariance_accumulator_; } diff --git a/src/nnet2/combine-nnet-fast.cc b/src/nnet2/combine-nnet-fast.cc index 02265a5f6ac..7ab2c9caf05 100644 --- a/src/nnet2/combine-nnet-fast.cc +++ b/src/nnet2/combine-nnet-fast.cc @@ -204,7 +204,7 @@ void FastNnetCombiner::CombineNnets(const Vector &scale_params, int32 num_nnets = nnets.size(); KALDI_ASSERT(num_nnets >= 1); int32 num_uc = nnets[0].NumUpdatableComponents(); - KALDI_ASSERT(num_nnets * nnets[0].NumUpdatableComponents()); + KALDI_ASSERT(nnets[0].NumUpdatableComponents() >= 1); *dest = nnets[0]; diff --git a/src/nnet2/combine-nnet.cc b/src/nnet2/combine-nnet.cc index 417db1b84c4..57cc6133c58 100644 --- a/src/nnet2/combine-nnet.cc +++ b/src/nnet2/combine-nnet.cc @@ -31,9 +31,9 @@ static void CombineNnets(const Vector &scale_params, int32 num_nnets = nnets.size(); KALDI_ASSERT(num_nnets >= 1); int32 num_uc = nnets[0].NumUpdatableComponents(); - KALDI_ASSERT(num_nnets * nnets[0].NumUpdatableComponents()); - - + KALDI_ASSERT(nnets[0].NumUpdatableComponents() >= 1); + + *dest = nnets[0]; SubVector scale_params0(scale_params, 0, num_uc); dest->ScaleComponents(scale_params0); @@ -59,7 +59,7 @@ static int32 GetInitialModel( for (int32 n = 0; n < num_nnets; n++) { BaseFloat objf = ComputeNnetObjf(nnets[n], validation_set, minibatch_size) / tot_frames; - + if (n == 0 || objf > best_objf) { best_objf = objf; best_n = n; @@ -98,7 +98,7 @@ static void GetInitialScaleParams( num_nnets = static_cast(nnets.size()); if (initial_model < 0 || initial_model > num_nnets) initial_model = GetInitialModel(validation_set, nnets); - + KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets); int32 num_uc = nnets[0].NumUpdatableComponents(); @@ -107,7 +107,7 @@ static void GetInitialScaleParams( KALDI_LOG << "Initializing with neural net with index " << initial_model; // At this point we're using the best of the individual neural nets. scale_params->Set(0.0); - + // Set the block of parameters corresponding to the "best" of the // source neural nets to SubVector best_block(*scale_params, num_uc * initial_model, num_uc); @@ -129,14 +129,14 @@ static double ComputeObjfAndGradient( Vector *gradient) { Vector scale_params_float(scale_params); - + Nnet nnet_combined; CombineNnets(scale_params_float, nnets, &nnet_combined); - + Nnet nnet_gradient(nnet_combined); bool is_gradient = true; nnet_gradient.SetZero(is_gradient); - + // note: "ans" is normalized by the total weight of validation frames. int32 batch_size = 1024; double ans = ComputeNnetGradient(nnet_combined, @@ -146,7 +146,7 @@ static double ComputeObjfAndGradient( double tot_frames = validation_set.size(); if (gradient != NULL) { - int32 i = 0; // index into scale_params. + int32 i = 0; // index into scale_params. for (int32 n = 0; n < static_cast(nnets.size()); n++) { for (int32 j = 0; j < nnet_combined.NumComponents(); j++) { const UpdatableComponent *uc = @@ -155,7 +155,7 @@ static double ComputeObjfAndGradient( dynamic_cast(&(nnet_gradient.GetComponent(j))); if (uc != NULL) { double dotprod = uc->DotProduct(*uc_gradient) / tot_frames; - (*gradient)(i) = dotprod; + (*gradient)(i) = dotprod; i++; } } @@ -165,14 +165,14 @@ static double ComputeObjfAndGradient( if (debug) { KALDI_LOG << "Double-checking gradient computation"; - + Vector manual_gradient(scale_params.Dim()); for (int32 i = 0; i < scale_params.Dim(); i++) { double delta = 1.0e-04, fg = fabs((*gradient)(i)); if (fg < 1.0e-07) fg = 1.0e-07; if (fg * delta < 1.0e-05) delta = 1.0e-05 / fg; - + Vector scale_params_temp(scale_params); scale_params_temp(i) += delta; double new_ans = ComputeObjfAndGradient(validation_set, @@ -185,10 +185,10 @@ static double ComputeObjfAndGradient( KALDI_LOG << "Manually computed gradient is " << manual_gradient; KALDI_LOG << "Gradient we computed is " << *gradient; } - + return ans; } - + void CombineNnets(const NnetCombineConfig &combine_config, const std::vector &validation_set, @@ -205,7 +205,7 @@ void CombineNnets(const NnetCombineConfig &combine_config, int32 dim = scale_params.Dim(); KALDI_ASSERT(dim > 0); Vector gradient(dim); - + double objf, initial_objf; LbfgsOptions lbfgs_options; @@ -213,11 +213,11 @@ void CombineNnets(const NnetCombineConfig &combine_config, lbfgs_options.m = dim; // Store the same number of vectors as the dimension // itself, so this is BFGS. lbfgs_options.first_step_impr = combine_config.initial_impr; - + OptimizeLbfgs lbfgs(scale_params, lbfgs_options); - - for (int32 i = 0; i < combine_config.num_bfgs_iters; i++) { + + for (int32 i = 0; i < combine_config.num_bfgs_iters; i++) { scale_params.CopyFromVec(lbfgs.GetProposedValue()); objf = ComputeObjfAndGradient(validation_set, scale_params, @@ -227,9 +227,9 @@ void CombineNnets(const NnetCombineConfig &combine_config, KALDI_VLOG(2) << "Iteration " << i << " scale-params = " << scale_params << ", objf = " << objf << ", gradient = " << gradient; - + if (i == 0) initial_objf = objf; - + lbfgs.DoStep(objf, gradient); } @@ -244,10 +244,10 @@ void CombineNnets(const NnetCombineConfig &combine_config, nnets[0].NumUpdatableComponents()); scale_params_mat.CopyRowsFromVec(scale_params_float); KALDI_LOG << "Final scale factors are " << scale_params_mat; - + CombineNnets(scale_params_float, nnets, nnet_out); } - - + + } // namespace nnet2 } // namespace kaldi diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index 584a7c19ab8..a3696403eba 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -880,7 +880,7 @@ void ComputationChecker::CheckComputationIndexes() const { KALDI_ERR << "Backprop input needed but not supplied."; if ((properties & kBackpropNeedsOutput) && c.arg4 == 0) KALDI_ERR << "Backprop output needed but not supplied."; - if (c.arg6 == 0 && !(properties && kUpdatableComponent)) { + if (c.arg6 == 0 && !(properties & kUpdatableComponent)) { // note: we could perhaps make this just a warning, // or optimize it away somehow. KALDI_ERR << "Backprop is done but has no effect."; diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index a798cb597f5..cccb1110d3c 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -298,7 +298,7 @@ void NnetChainTrainer::PrintMaxChangeStats() const { (num_minibatches_processed_ * (nnet_config.backstitch_training_scale == 0.0 ? 1.0 : 1.0 + 1.0 / nnet_config.backstitch_training_interval)) - << " \% of the time."; + << " % of the time."; i++; } } @@ -308,7 +308,7 @@ void NnetChainTrainer::PrintMaxChangeStats() const { (num_minibatches_processed_ * (nnet_config.backstitch_training_scale == 0.0 ? 1.0 : 1.0 + 1.0 / nnet_config.backstitch_training_interval)) - << " \% of the time."; + << " % of the time."; } NnetChainTrainer::~NnetChainTrainer() { diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index e8c99494b06..32f49745c0c 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -4068,13 +4068,13 @@ bool CompositeComponent::IsUpdatable() const { int32 CompositeComponent::InputDim() const { KALDI_ASSERT(!components_.empty()); return components_.front()->InputDim(); -}; +} // virtual int32 CompositeComponent::OutputDim() const { KALDI_ASSERT(!components_.empty()); return components_.back()->OutputDim(); -}; +} // virtual int32 CompositeComponent::Properties() const { @@ -4096,7 +4096,7 @@ int32 CompositeComponent::Properties() const { if (last_component_properties & kStoresStats) ans |= kBackpropNeedsOutput; return ans; -}; +} MatrixStrideType CompositeComponent::GetStrideType(int32 i) const { @@ -4319,7 +4319,7 @@ void CompositeComponent::Backprop(const std::string &debug_info, // optimization; other propagates might also be skippable. int32 properties = components_[num_components - 2]->Properties(), next_properties = components_[num_components - 1]->Properties(); - if (!(properties & (kBackpropNeedsOutput || kUsesMemo)) && + if (!(properties & (kBackpropNeedsOutput | kUsesMemo)) && !(next_properties & kBackpropNeedsInput)) { num_components_to_propagate--; } diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 0acaa5c2008..820644470c7 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -257,7 +257,7 @@ void NnetTrainer::PrintMaxChangeStats() const { (num_minibatches_processed_ * (config_.backstitch_training_scale == 0.0 ? 1.0 : 1.0 + 1.0 / config_.backstitch_training_interval)) - << " \% of the time."; + << " % of the time."; i++; } } @@ -267,7 +267,7 @@ void NnetTrainer::PrintMaxChangeStats() const { (num_minibatches_processed_ * (config_.backstitch_training_scale == 0.0 ? 1.0 : 1.0 + 1.0 / config_.backstitch_training_interval)) - << " \% of the time."; + << " % of the time."; } void ObjectiveFunctionInfo::UpdateStats( diff --git a/src/rnnlm/rnnlm-core-training.cc b/src/rnnlm/rnnlm-core-training.cc index 5a1ae97895f..d1a01f7ef66 100644 --- a/src/rnnlm/rnnlm-core-training.cc +++ b/src/rnnlm/rnnlm-core-training.cc @@ -302,7 +302,7 @@ void RnnlmCoreTrainer::PrintMaxChangeStats() const { << ", per-component max-change was enforced " << ((100.0 * num_max_change_per_component_applied_[i]) / num_minibatches_processed_) - << "\% of the time."; + << "% of the time."; i++; } } @@ -312,7 +312,7 @@ void RnnlmCoreTrainer::PrintMaxChangeStats() const { (num_minibatches_processed_ * (config_.backstitch_training_scale == 0.0 ? 1.0 : 1.0 + 1.0 / config_.backstitch_training_interval)) - << "\% of the time."; + << "% of the time."; } void RnnlmCoreTrainer::ProcessOutput( diff --git a/src/rnnlm/rnnlm-embedding-training.cc b/src/rnnlm/rnnlm-embedding-training.cc index c4238c7356a..0b5916b6bba 100644 --- a/src/rnnlm/rnnlm-embedding-training.cc +++ b/src/rnnlm/rnnlm-embedding-training.cc @@ -117,9 +117,9 @@ void RnnlmEmbeddingTrainer::TrainBackstitch( bool is_backstitch_step1, CuMatrixBase *embedding_deriv) { - // backstitch training is incompatible with momentum > 0 + // backstitch training is incompatible with momentum > 0 KALDI_ASSERT(config_.momentum == 0.0); - + // If relevant, do the following: // "embedding_deriv += - 2 * l2_regularize * embedding_mat_" // This is an approximate to the regular l2 regularization (add l2 regularization @@ -130,7 +130,7 @@ void RnnlmEmbeddingTrainer::TrainBackstitch( embedding_deriv->AddMat(1.0 / (1.0 + config_.backstitch_training_scale) * l2_term, *embedding_mat_); } - } + } BaseFloat scale = 1.0; if (config_.use_natural_gradient) { @@ -213,7 +213,7 @@ void RnnlmEmbeddingTrainer::Train( } void RnnlmEmbeddingTrainer::TrainBackstitch( - bool is_backstitch_step1, + bool is_backstitch_step1, const CuArrayBase &active_words, CuMatrixBase *embedding_deriv) { @@ -232,7 +232,7 @@ void RnnlmEmbeddingTrainer::TrainBackstitch( embedding_deriv->AddRows(l2_term / (1.0 + config_.backstitch_training_scale), *embedding_mat_, active_words); } - } + } BaseFloat scale = 1.0; if (config_.use_natural_gradient) { if (is_backstitch_step1) preconditioner_.Freeze(true); @@ -273,7 +273,7 @@ void RnnlmEmbeddingTrainer::PrintStats() { (num_minibatches_ * (config_.backstitch_training_scale == 0.0 ? 1.0 : 1.0 + 1.0 / config_.backstitch_training_interval)) - << " \% of the time."; + << " % of the time."; Matrix delta_embedding_mat(*embedding_mat_); delta_embedding_mat.AddMat(-1.0, initial_embedding_mat_); diff --git a/src/tree/build-tree-questions.h b/src/tree/build-tree-questions.h index a6bcfdd500b..22f12d62912 100644 --- a/src/tree/build-tree-questions.h +++ b/src/tree/build-tree-questions.h @@ -52,7 +52,7 @@ struct QuestionsForKey { // Configuration class associated with a particular ke std::vector > initial_questions; RefineClustersOptions refine_opts; // if refine_opts.max_iter == 0, // we just pick from the initial questions. - + QuestionsForKey(int32 num_iters = 5): refine_opts(num_iters, 2) { // refine_cfg with 5 iters and top-n = 2 (this is no restriction because // RefineClusters called with 2 clusters; would get set to that anyway as @@ -102,7 +102,9 @@ class Questions { // careful, this is a class. KALDI_ASSERT(keys_out != NULL); CopyMapKeysToVector(key_idx_, keys_out); } - const bool HasQuestionsForKey(EventKeyType key) const { return (key_idx_.count(key) != 0); } + bool HasQuestionsForKey(EventKeyType key) const { + return (key_idx_.count(key) != 0); + } ~Questions() { kaldi::DeletePointers(&key_options_); } diff --git a/src/util/kaldi-pipebuf.h b/src/util/kaldi-pipebuf.h index 9b83cdccc3d..61034ac2757 100644 --- a/src/util/kaldi-pipebuf.h +++ b/src/util/kaldi-pipebuf.h @@ -82,7 +82,6 @@ class basic_pipebuf : public std::basic_filebuf { }; // class basic_pipebuf #endif // _MSC_VER -}; // namespace kaldi +} // namespace kaldi #endif // KALDI_UTIL_KALDI_PIPEBUF_H_ - From 5a5696ff3bc544911601f989fb1ec2793db7c395 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Sun, 10 Mar 2019 19:53:09 -0700 Subject: [PATCH 068/235] [build] Some fixes to Makefiles (#3088) clang is unhappy with '-rdynamic' in compile-only step, and the switch is really unnecessary. Also, the default location for MKL 64-bit libraries is intel64/. The em64t/ was explained already obsolete by an Intel rep in 2010: https://software.intel.com/en-us/forums/intel-math-kernel-library/topic/285973 --- src/makefiles/cuda_64bit.mk | 4 ++-- src/makefiles/default_rules.mk | 2 +- src/makefiles/linux_x86_64_mkl.mk | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk index d66ae03602f..eb8cf743ab3 100644 --- a/src/makefiles/cuda_64bit.mk +++ b/src/makefiles/cuda_64bit.mk @@ -5,7 +5,7 @@ ifndef CUDATKDIR $(error CUDATKDIR not defined.) endif -CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include -fPIC -pthread -isystem $(OPENFSTINC) -rdynamic +CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include -fPIC -pthread -isystem $(OPENFSTINC) CUDA_INCLUDE= -I$(CUDATKDIR)/include -I$(CUBROOT) CUDA_FLAGS = --machine 64 -DHAVE_CUDA \ @@ -14,4 +14,4 @@ CUDA_FLAGS = --machine 64 -DHAVE_CUDA \ --verbose -Xcompiler "$(CXXFLAGS)" CUDA_LDFLAGS += -L$(CUDATKDIR)/lib64 -Wl,-rpath,$(CUDATKDIR)/lib64 -CUDA_LDLIBS += -lcublas -lcusparse -lcudart -lcurand -lnvToolsExt #LDLIBS : The libs are loaded later than static libs in implicit rule +CUDA_LDLIBS += -lcublas -lcusparse -lcudart -lcurand -lnvToolsExt #LDLIBS : The .so libs are loaded later than static libs in implicit rule diff --git a/src/makefiles/default_rules.mk b/src/makefiles/default_rules.mk index 25dafae2f3a..fcce90f5c21 100644 --- a/src/makefiles/default_rules.mk +++ b/src/makefiles/default_rules.mk @@ -125,7 +125,7 @@ valgrind: .valgrind #buid up dependency commands CC_SRCS=$(wildcard *.cc) #check if files exist to run dependency commands on -ifneq ($(CC_SRCS),) +ifneq ($(CC_SRCS),) CC_DEP_COMMAND=$(CXX) -M $(CXXFLAGS) $(CC_SRCS) endif diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 7a70fa51a65..d1c399d9796 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -22,7 +22,7 @@ ifndef MKLROOT $(error MKLROOT not defined.) endif -MKLLIB ?= $(MKLROOT)/lib/em64t +MKLLIB ?= $(MKLROOT)/lib/intel64 CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \ -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ From abd4869ca4f527b361e567c2eb29b06d0f3a6d9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=BBelasko?= Date: Mon, 11 Mar 2019 21:22:24 -0400 Subject: [PATCH 069/235] [src] Fixed -Wreordered warnings in feat (#3090) --- src/feat/feature-window.h | 5 +++-- src/feat/online-feature.cc | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index 2fccaefb9a1..c9172521d7c 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -62,8 +62,9 @@ struct FrameExtractionOptions { blackman_coeff(0.42), snip_edges(true), allow_downsample(false), - max_feature_vectors(-1), - allow_upsample(false) { } + allow_upsample(false), + max_feature_vectors(-1) + { } void Register(OptionsItf *opts) { opts->Register("sample-frequency", &samp_freq, diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc index 813e7b16f0c..90170a266e5 100644 --- a/src/feat/online-feature.cc +++ b/src/feat/online-feature.cc @@ -70,8 +70,8 @@ template OnlineGenericBaseFeature::OnlineGenericBaseFeature( const typename C::Options &opts): computer_(opts), window_function_(computer_.GetFrameOptions()), - input_finished_(false), waveform_offset_(0), - features_(opts.frame_opts.max_feature_vectors) { } + features_(opts.frame_opts.max_feature_vectors), + input_finished_(false), waveform_offset_(0) { } template void OnlineGenericBaseFeature::AcceptWaveform(BaseFloat sampling_rate, From 9c8ba0f873ab6ba5b19ee54d950fdd0460f5ead2 Mon Sep 17 00:00:00 2001 From: Ewald Enzinger Date: Mon, 11 Mar 2019 20:14:04 -0700 Subject: [PATCH 070/235] [egs] Replace bc with perl -e (#3093) --- .../v1/diarization/nnet3/xvector/extract_xvectors.sh | 2 +- egs/callhome_diarization/v1/run.sh | 2 +- egs/callhome_diarization/v2/run.sh | 2 +- egs/dihard_2018/v1/run.sh | 2 +- egs/dihard_2018/v2/run.sh | 2 +- egs/sre08/v1/local/score_sre08.sh | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh index d7591a6a3a8..8d579138c73 100755 --- a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh +++ b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh @@ -102,7 +102,7 @@ if [ $stage -le 0 ]; then fi utils/data/get_uniform_subsegments.py \ --max-segment-duration=$window \ - --overlap-duration=$(echo "$window-$period" | bc) \ + --overlap-duration=$(perl -e "print ($window-$period);") \ --max-remaining-duration=$min_segment \ --constant-duration=True \ $segments > $dir/subsegments diff --git a/egs/callhome_diarization/v1/run.sh b/egs/callhome_diarization/v1/run.sh index acc48bd24f9..f4652c0c0ef 100755 --- a/egs/callhome_diarization/v1/run.sh +++ b/egs/callhome_diarization/v1/run.sh @@ -188,7 +188,7 @@ if [ $stage -le 6 ]; then der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \ exp/tuning/${dataset}_t${threshold}) - if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then + if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then best_der=$der best_threshold=$threshold fi diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh index ae05dd9da1c..b79717e2348 100755 --- a/egs/callhome_diarization/v2/run.sh +++ b/egs/callhome_diarization/v2/run.sh @@ -297,7 +297,7 @@ if [ $stage -le 10 ]; then der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \ $nnet_dir/tuning/${dataset}_t${threshold}) - if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then + if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then best_der=$der best_threshold=$threshold fi diff --git a/egs/dihard_2018/v1/run.sh b/egs/dihard_2018/v1/run.sh index 429a1231975..44af9f48c3f 100755 --- a/egs/dihard_2018/v1/run.sh +++ b/egs/dihard_2018/v1/run.sh @@ -186,7 +186,7 @@ if [ $stage -le 7 ]; then der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \ $ivec_dir/tuning/dihard_2018_dev_t${threshold}) - if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then + if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then best_der=$der best_threshold=$threshold fi diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh index 1c018dfcc55..0da1f330ea7 100755 --- a/egs/dihard_2018/v2/run.sh +++ b/egs/dihard_2018/v2/run.sh @@ -260,7 +260,7 @@ if [ $stage -le 12 ]; then der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \ $nnet_dir/tuning/dihard_2018_dev_t${threshold}) - if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then + if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then best_der=$der best_threshold=$threshold fi diff --git a/egs/sre08/v1/local/score_sre08.sh b/egs/sre08/v1/local/score_sre08.sh index 92831502f45..c1584946735 100755 --- a/egs/sre08/v1/local/score_sre08.sh +++ b/egs/sre08/v1/local/score_sre08.sh @@ -35,11 +35,11 @@ tot_eer=0.0 printf '% 12s' 'EER:' for condition in $(seq 8); do eer=$(awk '{print $3}' $scores | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | compute-eer - 2>/dev/null) - tot_eer=$(echo "$tot_eer+$eer" | bc) + tot_eer=$(perl -e "print ($tot_eer+$eer);") eers[$condition]=$eer done -eers[0]=$(echo "$tot_eer/8" | bc -l) +eers[0]=$(perl -e "print ($tot_eer/8.0);") for i in $(seq 0 8); do printf '% 7.2f' ${eers[$i]} From 8cbd582ef4550475ae33431eace5e7e59a2d8575 Mon Sep 17 00:00:00 2001 From: Nikhil M Date: Tue, 12 Mar 2019 09:58:28 +0530 Subject: [PATCH 071/235] [scripts] Fix python3 compatibility issue in data-perturbing script (#3084) --- egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh | 2 +- egs/wsj/s5/utils/data/perturb_data_dir_volume.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh index 526059b7b90..8f566ccfe6d 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh @@ -160,7 +160,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh index dae440b03a3..e357ba8cbfb 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh @@ -52,15 +52,15 @@ for line in sys.stdin.readlines(): parts = line.strip().split() if line.strip()[-1] == '|': if re.search('sox --vol', ' '.join(parts[-11:])): - print 'true' + print('true') sys.exit(0) elif re.search(':[0-9]+$', line.strip()) is not None: continue else: if ' '.join(parts[1:3]) == 'sox --vol': - print 'true' + print('true') sys.exit(0) -print 'false' +print('false') "` || exit 1 if $volume_perturb_done; then From 74356619b93ffceada30886bd2957dbfd7d0d853 Mon Sep 17 00:00:00 2001 From: csukuangfj <5284924+csukuangfj@users.noreply.github.com> Date: Wed, 13 Mar 2019 01:23:57 +0800 Subject: [PATCH 072/235] [doc] fix some typos in doc. (#3097) --- src/doc/io.dox | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/doc/io.dox b/src/doc/io.dox index dc958f57a6f..8f3a3cc05b6 100644 --- a/src/doc/io.dox +++ b/src/doc/io.dox @@ -383,7 +383,7 @@ namespace kaldi { std::string rspecifier2 = "ark:-"; // archive read from stdin. // write to a gzipped text archive. std::string wspecifier1 = "ark,t:| gzip -c > /some/dir/foo.ark.gz"; - std::string wspecifier2 = "ark,scp:data/my.ark,data/my.ark"; + std::string wspecifier2 = "ark,scp:data/my.ark,data/my.scp"; \endcode Usually, an rspecifier or wspecifier consists of a comma-separated, unordered @@ -401,7 +401,7 @@ namespace kaldi { \endverbatim This will write an archive, and a script file with lines like "utt_id /somedir/foo.ark:1234" that specify offsets into the - archive for more efficient random access. You can then do what you like which + archive for more efficient random access. You can then do whatever you like with the script file, including breaking it up into segments, and it will behave like any other script file. Note that although the order of options before the colon doesn't generally matter, in this particular case the "ark" must come before From 5bdea696359528278a0da7e9792315c1f4efb169 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Tue, 12 Mar 2019 10:27:35 -0700 Subject: [PATCH 073/235] [build] Make sure expf() speed probe times sensibly (#3089) --- src/probe/README.slow_expf | 11 +++++--- src/probe/exp-test.cc | 51 +++++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/probe/README.slow_expf b/src/probe/README.slow_expf index 00c9ce5be09..c20386b8137 100644 --- a/src/probe/README.slow_expf +++ b/src/probe/README.slow_expf @@ -1,5 +1,6 @@ -On some machines, expf() turns out to be very slow: much slower than its double precision counterpart exp(). -Probably this is concerned with the version of glibc. +On some machines, expf() turns out to be very slow: much slower than its double +precision counterpart exp(). Probably this is concerned with the version of +glibc. Here are a couple of examples: @@ -21,5 +22,7 @@ configuration$ ./exp-test exp() time: 0.0028439 expf() time: 0.00713329 -If slow behaviour is detected, then KALDI_NO_EXPF macro will be used, and the Exp() wrapper in base/kaldi-math.h will use exp() even for single precision floats. -The behaviour of expf() is considered to be slow if it is slower than exp() by at least 10%. \ No newline at end of file +If slow behaviour is detected, then KALDI_NO_EXPF macro will be used, and the +Exp() wrapper in base/kaldi-math.h will use exp() even for single precision +floats. The behaviour of expf() is considered to be slow if it is slower than +exp() by at least 10%. diff --git a/src/probe/exp-test.cc b/src/probe/exp-test.cc index 1fd8a64c6a6..d6cc76d4ce2 100644 --- a/src/probe/exp-test.cc +++ b/src/probe/exp-test.cc @@ -17,35 +17,52 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +// Read Makefile.slow_expf. This test must be compiled with -O0. + #include #include #include "base/timer.h" -#define SAMPLE 100000 +int main() { + int test_iter = 300000; + + // Make sure that the CPU bumps its clock to full speed: run the first loop + // without timing. Then increase the sample iteration count exponentially + // until the loop takes at least 10ms. We run this loop 1/4 of the number of + // actual test iterations and call both exp() and expf(), so that the overall + // test run will take 20 to 60 ms, to ensure a sensibly measurable result. + for (bool first = true; ; first=false) { + kaldi::Timer timer; + for(int i = 0; i < test_iter; i += 4) { + (void)exp((double)(i & 0x0F)); + (void)expf((double)(i & 0x0F)); + } + double time = timer.Elapsed(); + if (first) continue; + if (time > 0.01) break; + test_iter *= 3; + } -int main() { - float dummy = 0.0; kaldi::Timer exp_timer; - for(int i = 0; i < SAMPLE; ++i) { - dummy += exp((double)(i % 10)); + for(int i = 0; i < test_iter; ++i) { + (void)exp((double)(i & 0x0F)); } double exp_time = exp_timer.Elapsed(); kaldi::Timer expf_timer; - for(int i = 0; i < SAMPLE; ++i) { - dummy += expf((double)(i % 10)); + for(int i = 0; i < test_iter; ++i) { + (void)expf((double)(i & 0x0F)); } double expf_time = expf_timer.Elapsed(); - - // Often exp() and expf() perform very similarly, - // so we will replace expf() by exp() only if there is at least 10% difference - if (expf_time < exp_time * 1.1) { + + double ratio = expf_time / exp_time; + if (ratio < 1.1) { + // Often exp() and expf() perform very similarly, so we will replace expf() + // by exp() only if there is at least 10% difference. return 0; - } else { - std::cerr << "exp() time: " << exp_time << std::endl; - std::cerr << "expf() time: " << expf_time << std::endl; - return 1; } - - std::cerr << dummy << std::endl; // No complaint about the unused variable + + std::cerr << ("WARNING: slow expf() detected. expf() is slower than exp() " + "by the factor of ") << ratio << "\n"; + return 1; } From b7a4feccdac3a06fdb5370cbe25aa5cd42062b33 Mon Sep 17 00:00:00 2001 From: Syun Date: Wed, 13 Mar 2019 02:30:11 +0800 Subject: [PATCH 074/235] [scripts] Make sure merge_targets.py works in python3 (#3094) --- egs/wsj/s5/steps/segmentation/internal/merge_targets.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/internal/merge_targets.py b/egs/wsj/s5/steps/segmentation/internal/merge_targets.py index a14aef151c2..84b0c884f45 100755 --- a/egs/wsj/s5/steps/segmentation/internal/merge_targets.py +++ b/egs/wsj/s5/steps/segmentation/internal/merge_targets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2017 Vimal Manohar # Apache 2.0 @@ -16,8 +16,6 @@ option. """ -from __future__ import print_function -from __future__ import division import argparse import logging import numpy as np @@ -111,7 +109,7 @@ def should_remove_frame(row, dim): # source[2] = [ 0 0 0 ] """ assert len(row) % dim == 0 - num_sources = len(row) / dim + num_sources = len(row) // dim max_idx = np.argmax(row) max_val = row[max_idx] From 94475d6994e581877d87684d731406a1759ebcff Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Wed, 13 Mar 2019 12:21:20 -0400 Subject: [PATCH 075/235] [src] ifdef to fix compilation failure on CUDA 8 and earlier (#3103) --- src/cudamatrix/cu-device.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc index 140275d3b6e..85c2492c074 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc @@ -111,12 +111,14 @@ void CuDevice::Initialize() { CUBLAS_SAFE_CALL(cublasCreate(&cublas_handle_)); CUBLAS_SAFE_CALL(cublasSetStream(cublas_handle_, cudaStreamPerThread)); + #if CUDA_VERSION >= 9000 if (device_options_.use_tensor_cores) { // Enable tensor cores in CUBLAS // Note if the device does not support tensor cores this will fall back to normal math mode CUBLAS_SAFE_CALL(cublasSetMathMode(cublas_handle_, CUBLAS_TENSOR_OP_MATH)); } + #endif // Initialize the cuSPARSE library CUSPARSE_SAFE_CALL(cusparseCreate(&cusparse_handle_)); From fc8c17b18f4c4224f70b09086a3e69df67e93674 Mon Sep 17 00:00:00 2001 From: csukuangfj <5284924+csukuangfj@users.noreply.github.com> Date: Thu, 14 Mar 2019 01:07:52 +0800 Subject: [PATCH 076/235] [doc] fix typos and broken links in doc. (#3102) --- egs/rm/README.txt | 2 +- src/doc/data_prep.dox | 2 +- src/doc/dependencies.dox | 2 +- src/doc/dnn.dox | 2 +- src/doc/kaldi_for_dummies.dox | 32 ++++++++++++++++---------------- src/doc/tutorial_looking.dox | 4 ++-- src/doc/tutorial_prereqs.dox | 2 +- src/doc/tutorial_running.dox | 10 +++++----- src/doc/versions.dox | 4 ++-- 9 files changed, 30 insertions(+), 30 deletions(-) diff --git a/egs/rm/README.txt b/egs/rm/README.txt index ed588e481c6..4fa3d7c87e8 100644 --- a/egs/rm/README.txt +++ b/egs/rm/README.txt @@ -9,7 +9,7 @@ About the Resource Management corpus: Each subdirectory of this directory contains the scripts for a sequence of experiments. -s5 is the currently recommmended setup. +s5 is the currently recommended setup. s5: This is the "new-new-style" recipe. It is now finished. All further work will be on top of this style of recipe. Note: diff --git a/src/doc/data_prep.dox b/src/doc/data_prep.dox index d8fe1746df1..e81032537cc 100644 --- a/src/doc/data_prep.dox +++ b/src/doc/data_prep.dox @@ -191,7 +191,7 @@ the speaker identities, you can just make the speaker-ids the same as the uttera so the format of the file would be just \ \. We have made the previous sentence bold because we have encountered people creating a "global" speaker-id. This is a bad idea because it makes cepstral mean normalization -ineffective in traning (since it's applied globally), and because it will create problems +ineffective in training (since it's applied globally), and because it will create problems when you use utils/split_data_dir.sh to split your data into pieces. There is another file that exists in some setups; it is used only occasionally and diff --git a/src/doc/dependencies.dox b/src/doc/dependencies.dox index 63d2658b726..d8a5591955f 100644 --- a/src/doc/dependencies.dox +++ b/src/doc/dependencies.dox @@ -113,7 +113,7 @@ - CLAPACK, the linear algebra library (we download the headers). This is useful only on systems where you don't have ATLAS and are instead compiling with CLAPACK. - - OpenBLAS: this is an alernative to ATLAS or CLAPACK. The scripts don't + - OpenBLAS: this is an alternative to ATLAS or CLAPACK. The scripts don't use it by default but we provide installation scripts so you can install it if you want to compare it against ATLAS (it's more actively maintained than ATLAS). diff --git a/src/doc/dnn.dox b/src/doc/dnn.dox index 5b3d2b98261..bab4658e552 100644 --- a/src/doc/dnn.dox +++ b/src/doc/dnn.dox @@ -37,7 +37,7 @@ namespace kaldi { We currently have three separate codebases for deep neural nets in Kaldi. All are still active in the sense that the up-to-date recipes refer to all of them. The first one ("nnet1"( is located in code subdirectories nnet/ and - nnetbin/, and is primiarly maintained by Karel Vesely. The second is located + nnetbin/, and is primarily maintained by Karel Vesely. The second is located in code subdirectories nnet2/ and nnet2bin/, and is primarily maintained by Daniel Povey (this code was originally based on an earlier version of Karel's code, but it has been extensively rewritten). The third is located diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index d712ab87af9..b48d6dd8dac 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -71,7 +71,7 @@ and installation, - \c awk – programming language, used for searching and processing patterns in files and data streams, - \c bash – Unix shell and script programming language, - - \c grep – command-line utility for searching plain-text data sets for lines + - \c grep – command-line utility for searching plain-text datasets for lines matching a regular expression, - \c make – automatically builds executable programs and libraries from source code, @@ -96,7 +96,7 @@ be nice if you read any \c README files you find. \c kaldi - main Kaldi directory which contains: - \c egs – example scripts allowing you to quickly build ASR -systems for over 30 popular speech corporas (documentation is attached for each +systems for over 30 popular speech corpora (documentation is attached for each project), - \c misc – additional tools and supplies, not needed for proper Kaldi functionality, @@ -136,34 +136,34 @@ the stuff related to your project. I assume that you want to set up an ASR system, basing on your own audio data. For example - let it be a set of 100 files. File format is WAV. Each file -contains 3 spoken digits recorded in english language, one by one. Each of +contains 3 spoken digits recorded in English language, one by one. Each of these audio files is named in a recognizable way (e.g. \c 1_5_6.wav, which in my pattern means that the spoken sentence is 'one, five, six') and placed in the recognizable folder representing particular speaker during a particular recording session (there may be a situation that you have recordings of the same person but in two different quality/noise environments - put these -in separate folders). So to sum up, my exemplary data set looks like this: +in separate folders). So to sum up, my exemplary dataset looks like this: - 10 different speakers (ASR systems must be trained and tested on different speakers, the more speakers you have the better), - each speaker says 10 sentences, - - 100 senteces/utterances (in 100 *.wav files placed in 10 folders related to + - 100 sentences/utterances (in 100 *.wav files placed in 10 folders related to particular speakers - 10 *.wav files in each folder), - 300 words (digits from zero to nine), - each sentence/utterance consist of 3 words. -Whatever your first data set is, adjust my example to your particular case. Be -careful with big data sets and complex grammars - start with something simple. +Whatever your first dataset is, adjust my example to your particular case. Be +careful with big datasets and complex grammars - start with something simple. Sentences that contain only digits are perfect in this case.

Task

Go to \c kaldi/egs/digits directory and create \c digits_audio folder. In \c kaldi/egs/digits/digits_audio create two folders: \c train and \c test. Select one speaker -of your choice to represent testing data set. Use this speaker's 'speakerID' as +of your choice to represent testing dataset. Use this speaker's 'speakerID' as a name for an another new folder in \c kaldi/egs/digits/digits_audio/test directory. Then put there all the audio files related to that person. Put the rest (9 speakers) into \c train folder - this will be your training -data set. Also create subfolders for each speaker. +dataset. Also create subfolders for each speaker. \subsection kaldi_for_dummies_acoustic Acoustic data @@ -174,14 +174,14 @@ section as well) can be considered as a text file with some number of strings (each string in a new line). These strings need to be sorted. If you will encounter any sorting issues you can use Kaldi scripts for checking (\c utils/validate_data_dir.sh) and fixing (\c utils/fix_data_dir.sh) data order. -And for you information - \c utils directory will be attached to your project in +And for your information - \c utils directory will be attached to your project in \ref kaldi_for_dummies_tools "Tools attachment" section.

Task

In \c kaldi/egs/digits directory, create a folder \c data. Then create \c test and \c train subfolders inside. Create in each subfolder following files (so you have files named in the same way in \c test and \c train subfolders -but they relate to two different data sets that you created before): +but they relate to two different datasets that you created before): a.) \c spk2gender
This file informs about speakers gender. As we assumed, 'speakerID' is a unique @@ -252,7 +252,7 @@ four four two \subsection kaldi_for_dummies_language Language data -This section relates to language modelling files that also need to be considered +This section relates to language modeling files that also need to be considered as 'must be done'. Look for the syntax details here: \ref data_prep (each file is precisely described). Also feel free to read some examples in other \c egs scripts. Now is the perfect time. @@ -395,7 +395,7 @@ decided to use two different training methods: - TRI1 - simple triphone training (first triphone pass). These two methods are enough to show noticable differences in decoding results -using only digits lexicon and small training data set. +using only digits lexicon and small training dataset.

Task

In \c kaldi/egs/digits directory create 3 scripts: @@ -432,7 +432,7 @@ c.) \c run.sh . ./path.sh || exit 1 . ./cmd.sh || exit 1 -nj=1 # number of parallel jobs - 1 is perfect for such a small data set +nj=1 # number of parallel jobs - 1 is perfect for such a small dataset lm_order=1 # language model order (n-gram quantity) - 1 is enough for digits grammar # Safety mechanism (possible running this script with modified arguments) @@ -575,7 +575,7 @@ folder (same directory). This is just an example. The point of this short tutorial is to show you how to create 'anything' in Kaldi and to get a better understanding of how to think while using this toolkit. Personally I started with looking for tutorials made -by the Kaldi authors/developers. After succesful Kaldi installation I launched +by the Kaldi authors/developers. After successful Kaldi installation I launched some example scripts (Yesno, Voxforge, LibriSpeech - they are relatively easy and have free acoustic/language data to download - I used these three as a base for my own scripts). @@ -586,7 +586,7 @@ There are two very useful sections for beginners inside:
a.) \ref tutorial - almost 'step by step' tutorial on how to set up an ASR system; up to some point this can be done without RM dataset. It is good to read it,
-b.) \ref data_prep - very detailed explaination of how to use your own data +b.) \ref data_prep - very detailed explanation of how to use your own data in Kaldi. More useful links about Kaldi I found:
diff --git a/src/doc/tutorial_looking.dox b/src/doc/tutorial_looking.dox index 420abfc9bce..831d721c7eb 100644 --- a/src/doc/tutorial_looking.dox +++ b/src/doc/tutorial_looking.dox @@ -171,7 +171,7 @@ making sure have their normal values, begin with KALDI_. This is a precaution to avoid future conflicts with other codebases (since \#defines don't limit themselves to the kaldi namespace). Notice the style of the function names: LikeThis(). Our style is generally based on - this one , + this one , to conform with OpenFst, but there are some differences. To see other elements of the style, which will help you to understand Kaldi @@ -190,7 +190,7 @@ It prints out the usage, which should give you a generic idea of how Kaldi progr are called. Note that while there is a --config option that can be used to pass a configuration file, in general Kaldi is not as config-driven as HTK and these files are not widely used. You will see a --binary option. In general, Kaldi file -formats come in both binary and test forms, and the --binary option controls how +formats come in both binary and text forms, and the --binary option controls how they are written. However, this only controls how single objects (e.g. acoustic models) are written. For whole collections of objects (e.g. collections of feature files), there is a different mechanism that we will come to later. diff --git a/src/doc/tutorial_prereqs.dox b/src/doc/tutorial_prereqs.dox index 82079a281b9..72b1fcf8ad8 100644 --- a/src/doc/tutorial_prereqs.dox +++ b/src/doc/tutorial_prereqs.dox @@ -51,7 +51,7 @@ The most difficult part of the installation process relates to the math library ATLAS; if this is not already installed as a library on your system you will have to compile it, and this requires that CPU throttling be turned off, which - may require root priveleges. We provide scripts and detailed instructions for + may require root privileges. We provide scripts and detailed instructions for all installation steps. When scripts fail, read the output carefully because it tries to provide guidance as to how to fix problems. Please inform us if there are problems at any point, however minor; see \ref other. diff --git a/src/doc/tutorial_running.dox b/src/doc/tutorial_running.dox index f977348a3cb..d639cd4e664 100644 --- a/src/doc/tutorial_running.dox +++ b/src/doc/tutorial_running.dox @@ -115,14 +115,14 @@ Now go back to the data directory and change directory to /train. Then execute t \verbatim head text -head spk2gender.map +head spk2gender head spk2utt head utt2spk head wav.scp \endverbatim - text - This file contains mappings between utterances and utterance ids which will be used by Kaldi. This file will be turned into an integer format-- still a text file, but with the words replaced with integers. -- spk2gender.map - This file contains mappings between speakers and their gender. This also acts as a list of unique users involved in training. +- spk2gender - This file contains mappings between speakers and their gender. This also acts as a list of unique users involved in training. - spk2utt - This is a mapping between the speaker identifiers and all the utterance identifiers associated with the speaker. - utt2spk - This is a one-to-one mapping between utterance ids and the corresponding speaker identifiers. - wav.scp - This file is actually read directly by Kaldi programs when doing feature extraction. Look at the file again. It is parsed as a set of key-value pairs, where the key is the first string on each line. The value is a kind of "extended filename", and you can guess how it works. Since it is for reading we will refer to this type of string as an "rxfilename" (for writing we use the term wxfilename). See \ref io_sec_xfilename if you are curious. Note that although we use the extension .scp, this is not a script file in the HTK sense (i.e. it is not viewed as an extension to the command-line arguments). @@ -383,7 +383,7 @@ do copy-tree --binary=false exp/mono/tree - | less \endverbatim Note that this is a monophone "tree" so it is very trivial-- it -does not have any "splits". Although this tree format was not indended to be +does not have any "splits". Although this tree format was not intended to be very human-readable, we have received a number of queries about the tree format so we will explain it. The rest of this paragraph can be skipped over by the casual reader. After "ToPdf", the tree file contains an object of the @@ -442,7 +442,7 @@ Type \verbatim grep Overall exp/mono/log/acc.{?,??}.{?,??}.log \endverbatim -You can see the acoustic likelihods on each iteration. Next look at one of the files +You can see the acoustic likelihoods on each iteration. Next look at one of the files exp/mono/log/update.*.log to see what kind of information is in the update log. When the monophone training is finished, we can test the monophone decoding. Before decoding, we have to create the decode graph. Type: @@ -505,7 +505,7 @@ gmm-decode-faster \endverbatim to see the usage message, and match up the arguments with what you see in the log file. Recall that "rspecifier" is one of those strings that specifies how to read a table, -and "wspecifier" specifies how to write one. Look carefuly at these arguments and try +and "wspecifier" specifies how to write one. Look carefully at these arguments and try to figure out what they mean. Look at the rspecifier that corresponds to the features, and try to understand it (this one has spaces inside, so Kaldi prints it out with single quotes around it so that you could paste it into the shell and the program would run as intended). diff --git a/src/doc/versions.dox b/src/doc/versions.dox index b26978b6e4d..08e2c2bbda7 100644 --- a/src/doc/versions.dox +++ b/src/doc/versions.dox @@ -28,7 +28,7 @@ \section versions_scheme Versioning scheme - During its lifetime, Kaldi has has three different versioning methods. + During its lifetime, Kaldi has three different versioning methods. Originally Kaldi was a subversion (svn)-based project, and was hosted on Sourceforge. Then Kaldi was moved to github, and for some time the only version-number available was the git hash of the commit. @@ -121,7 +121,7 @@ - Create a nnet3-based setup for RNN language models (i.e. recurrent and neural net based language models) - Some extentions to the core of the nnet3 framework to support constant values and - scalar multiplication without dedicated compoennts. + scalar multiplication without dedicated components. Below are commits corresponding to minor version numbers 5.3.x. From 3f8b6b29535930432cba50637bfd4921e514ae5b Mon Sep 17 00:00:00 2001 From: Yiming Wang Date: Wed, 13 Mar 2019 17:04:27 -0400 Subject: [PATCH 077/235] [scripts] Fix frame_shift bug in egs/swbd/s5c/local/score_sclite_conf.sh (#3104) --- egs/swbd/s5c/local/score_sclite_conf.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/egs/swbd/s5c/local/score_sclite_conf.sh b/egs/swbd/s5c/local/score_sclite_conf.sh index 9a1fa5083bf..21da4520a4d 100755 --- a/egs/swbd/s5c/local/score_sclite_conf.sh +++ b/egs/swbd/s5c/local/score_sclite_conf.sh @@ -39,6 +39,12 @@ for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done +if [ -f $dir/../frame_subsampling_factor ]; then + factor=$(cat $dir/../frame_subsampling_factor) || exit 1 + frame_shift_opt="--frame-shift=0.0$factor" + echo "$0: $dir/../frame_subsampling_factor exists, using $frame_shift_opt" +fi + name=`basename $data`; # e.g. eval2000 mkdir -p $dir/scoring/log @@ -51,7 +57,7 @@ if [ $stage -le 0 ]; then ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \ lattice-add-penalty --word-ins-penalty=$wip "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT ark:- - \| \ + lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT ark:- - \| \ utils/int2sym.pl -f 5 $lang/words.txt \| \ utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ '>' $dir/score_LMWT_${wip}/$name.ctm || exit 1; From 633e61c0492a73a9a829cb37ea1f8629c9efcd64 Mon Sep 17 00:00:00 2001 From: Martin Kocour Date: Thu, 14 Mar 2019 02:43:08 +0100 Subject: [PATCH 078/235] [src] Fix wrong assertion failure in nnet3-am-compute (#3106) --- src/nnet2bin/nnet-am-compute.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnet2bin/nnet-am-compute.cc b/src/nnet2bin/nnet-am-compute.cc index 32da30b73a5..fe07f9c6a25 100644 --- a/src/nnet2bin/nnet-am-compute.cc +++ b/src/nnet2bin/nnet-am-compute.cc @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) { int64 num_done = 0, num_frames = 0; Vector inv_priors(am_nnet.Priors()); - KALDI_ASSERT(inv_priors.Dim() == am_nnet.NumPdfs() && + KALDI_ASSERT(!divide_by_priors || inv_priors.Dim() == am_nnet.NumPdfs() && "Priors in neural network not set up."); inv_priors.ApplyPow(-1.0); From 8cafd32bdc1bb4d2586c99b1234b1f13f3d365b5 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 13 Mar 2019 22:15:36 -0400 Subject: [PATCH 079/235] [src] Cosmetic changes to natural-gradient code (#3108) --- src/nnet3/natural-gradient-online.cc | 24 ++++++++++++------------ src/nnet3/natural-gradient-online.h | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/nnet3/natural-gradient-online.cc b/src/nnet3/natural-gradient-online.cc index 0677e1ca474..a205490ee3f 100644 --- a/src/nnet3/natural-gradient-online.cc +++ b/src/nnet3/natural-gradient-online.cc @@ -119,14 +119,14 @@ void OnlineNaturalGradient::InitDefault(int32 D) { t_ = 0; } -void OnlineNaturalGradient::Init(const CuMatrixBase &R0) { - int32 D = R0.NumCols(); +void OnlineNaturalGradient::Init(const CuMatrixBase &X0) { + int32 D = X0.NumCols(); // for locking reasons it's better to use a different object. OnlineNaturalGradient this_copy(*this); this_copy.InitDefault(D); this_copy.t_ = 1; // Prevent recursion to Init() again. - CuMatrix R0_copy(R0.NumRows(), R0.NumCols(), kUndefined); + CuMatrix X0_copy(X0.NumRows(), X0.NumCols(), kUndefined); // 'num_iters' is number of iterations with the same data from a pseudorandom // start. this is a faster way of starting than doing eigenvalue // decomposition. @@ -134,11 +134,11 @@ void OnlineNaturalGradient::Init(const CuMatrixBase &R0) { // Note: we only do three iterations of initialization if we have enough data // that it's reasonably possible to estimate the subspace of dimension // this_copy.rank_. If we don't have more than that many rows in our initial - // minibatch R0, we just do one iteration... this gives us almost exactly - // (barring small effects due to epsilon_ > 0) the row subspace of R0 after + // minibatch X0, we just do one iteration... this gives us almost exactly + // (barring small effects due to epsilon_ > 0) the row subspace of X0 after // one iteration anyway. int32 num_init_iters; - if (R0.NumRows() <= this_copy.rank_) + if (X0.NumRows() <= this_copy.rank_) num_init_iters = 1; else num_init_iters = 3; @@ -147,8 +147,8 @@ void OnlineNaturalGradient::Init(const CuMatrixBase &R0) { // initialize. for (int32 i = 0; i < num_init_iters; i++) { BaseFloat scale; - R0_copy.CopyFromMat(R0); - this_copy.PreconditionDirections(&R0_copy, &scale); + X0_copy.CopyFromMat(X0); + this_copy.PreconditionDirections(&X0_copy, &scale); } rank_ = this_copy.rank_; W_t_.Swap(&this_copy.W_t_); @@ -197,7 +197,7 @@ void OnlineNaturalGradient::PreconditionDirections( t_ += 1; } -void OnlineNaturalGradient::ReorthogonalizeXt1( +void OnlineNaturalGradient::ReorthogonalizeRt1( const VectorBase &d_t1, BaseFloat rho_t1, CuMatrixBase *W_t1, @@ -214,7 +214,7 @@ void OnlineNaturalGradient::ReorthogonalizeXt1( ComputeEt(d_t1, beta_t1, &e_t1, &sqrt_e_t1, &inv_sqrt_e_t1); temp_O->SymAddMat2(1.0, *W_t1, kNoTrans, 0.0); - // O_t = E_t^{-0.5} W_t W_t^T E_t^{-0.5} + // O_{t+1} = E_{t+1}^{-0.5} W_{t+1} W_{t+1}^T E_{t+1}^{-0.5} Matrix O_mat(*temp_O); SpMatrix O(O_mat, kTakeLower); for (int32 i = 0; i < R; i++) { @@ -439,7 +439,7 @@ void OnlineNaturalGradient::PreconditionDirectionsInternal( if (self_debug_) { KALDI_WARN << "Reorthogonalizing."; } - ReorthogonalizeXt1(d_t1, + ReorthogonalizeRt1(d_t1, rho_t1, &W_t1, &J_t, @@ -510,7 +510,7 @@ void OnlineNaturalGradient::ComputeWt1(int32 N, // B_t = J_t + (1-\eta)/(\eta/N) (D_t + \rho_t I) W_t J_t->AddDiagVecMat(1.0, w_t_coeff_gpu, W_t, kNoTrans, 1.0); - // A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5} B_t + // A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5} Matrix A_t(U_t, kTrans); for (int32 i = 0; i < R; i++) { BaseFloat i_factor = (eta / N) * sqrt_e_t1(i) * inv_sqrt_c_t(i); diff --git a/src/nnet3/natural-gradient-online.h b/src/nnet3/natural-gradient-online.h index a68ad9bbb53..77be28a19d4 100644 --- a/src/nnet3/natural-gradient-online.h +++ b/src/nnet3/natural-gradient-online.h @@ -375,8 +375,8 @@ namespace nnet3 { * Initialization * Now, a note on what we do on time t = 0, i.e. for the first minibatch. We - initialize X_0 to the top R eigenvectors of 1/N X_0 X_0^T, where N is the - minibatch size (num-rows of R0). If L is the corresponding RxR diagonal + initialize R_0 to the top R eigenvectors of 1/N X_0 X_0^T, where N is the + minibatch size (num-rows of X0). If L is the corresponding RxR diagonal matrix of eigenvalues, then we will set D_0 = L - \rho_0 I. We set \rho_0 to ensure that tr(F_0) = 1/N tr(X_0 X_0^T), @@ -457,7 +457,7 @@ class OnlineNaturalGradient { not. */ - void PreconditionDirections(CuMatrixBase *R, + void PreconditionDirections(CuMatrixBase *X, BaseFloat *scale); @@ -515,7 +515,7 @@ class OnlineNaturalGradient { // This function is called if C_t has high condition number; it makes sure // that R_{t+1} is orthogonal. See the section in the extended comment above // on "keeping R_t orthogonal". - void ReorthogonalizeXt1(const VectorBase &d_t1, + void ReorthogonalizeRt1(const VectorBase &d_t1, BaseFloat rho_t1, CuMatrixBase *W_t1, CuMatrixBase *temp_W, From b1b230c8b1facd1695dfb7797892753426ba9dae Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Thu, 14 Mar 2019 17:22:24 +0100 Subject: [PATCH 080/235] [src,scripts] Python2 compatibility fixes and code cleanup for nnet1 (#3113) --- egs/wsj/s5/steps/nnet/train.sh | 12 - egs/wsj/s5/utils/nnet/gen_dct_mat.py | 11 +- egs/wsj/s5/utils/nnet/gen_hamm_mat.py | 5 +- egs/wsj/s5/utils/nnet/gen_splice.py | 5 +- egs/wsj/s5/utils/nnet/make_cnn2d_proto.py | 259 ---------- src/nnet/nnet-average-pooling-2d-component.h | 209 -------- src/nnet/nnet-component-test.cc | 151 ------ src/nnet/nnet-component.cc | 16 - src/nnet/nnet-component.h | 3 - src/nnet/nnet-convolutional-2d-component.h | 495 ------------------- src/nnet/nnet-max-pooling-2d-component.h | 225 --------- 11 files changed, 6 insertions(+), 1385 deletions(-) delete mode 100755 egs/wsj/s5/utils/nnet/make_cnn2d_proto.py delete mode 100644 src/nnet/nnet-average-pooling-2d-component.h delete mode 100644 src/nnet/nnet-convolutional-2d-component.h delete mode 100644 src/nnet/nnet-max-pooling-2d-component.h diff --git a/egs/wsj/s5/steps/nnet/train.sh b/egs/wsj/s5/steps/nnet/train.sh index c23a15362c7..50a62837b67 100755 --- a/egs/wsj/s5/steps/nnet/train.sh +++ b/egs/wsj/s5/steps/nnet/train.sh @@ -433,18 +433,6 @@ else ${bn_dim:+ --bottleneck-dim=$bn_dim} \ "$cnn_fea" $num_tgt $hid_layers $hid_dim >>$nnet_proto ;; - cnn2d) - delta_order=$([ -z $delta_opts ] && echo "0" || { echo $delta_opts | tr ' ' '\n' | grep "delta[-_]order" | sed 's:^.*=::'; }) - echo "Debug : $delta_opts, delta_order $delta_order" - utils/nnet/make_cnn2d_proto.py $cnn_proto_opts \ - --splice=$splice --delta-order=$delta_order --dir=$dir \ - $num_fea >$nnet_proto - cnn_fea=$(cat $nnet_proto | grep -v '^$' | tail -n1 | awk '{ print $5; }') - utils/nnet/make_nnet_proto.py $proto_opts \ - --no-smaller-input-weights \ - ${bn_dim:+ --bottleneck-dim=$bn_dim} \ - "$cnn_fea" $num_tgt $hid_layers $hid_dim >>$nnet_proto - ;; lstm) utils/nnet/make_lstm_proto.py $proto_opts \ $num_fea $num_tgt >$nnet_proto diff --git a/egs/wsj/s5/utils/nnet/gen_dct_mat.py b/egs/wsj/s5/utils/nnet/gen_dct_mat.py index 24139f1c9f8..77461112d0b 100755 --- a/egs/wsj/s5/utils/nnet/gen_dct_mat.py +++ b/egs/wsj/s5/utils/nnet/gen_dct_mat.py @@ -16,8 +16,8 @@ # limitations under the License. # ./gen_dct_mat.py -# script generates matrix with DCT transform, which is sparse -# and takes into account that data-layout is along frequency axis, +# script generates matrix with DCT transform, which is sparse +# and takes into account that data-layout is along frequency axis, # while DCT is done along temporal axis. from __future__ import division @@ -29,10 +29,7 @@ from optparse import OptionParser def print_on_same_line(text): - if (sys.version_info > (3,0)): - print(text, end=' ') - else: - print text, + print(text, end=' ') parser = OptionParser() parser.add_option('--fea-dim', dest='dim', help='feature dimension') @@ -69,7 +66,7 @@ def print_on_same_line(text): if(n==timeContext-1): print_on_same_line((dim-m-1)*'0 ') print() - print() + print() print(']') diff --git a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py index d7e9d9b7493..110178c6702 100755 --- a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py +++ b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py @@ -27,10 +27,7 @@ from optparse import OptionParser def print_on_same_line(text): - if (sys.version_info > (3,0)): - print(text, end=' ') - else: - print text, + print(text, end=' ') parser = OptionParser() parser.add_option('--fea-dim', dest='dim', help='feature dimension') diff --git a/egs/wsj/s5/utils/nnet/gen_splice.py b/egs/wsj/s5/utils/nnet/gen_splice.py index 3fe76513df6..f3a2c8b39ac 100755 --- a/egs/wsj/s5/utils/nnet/gen_splice.py +++ b/egs/wsj/s5/utils/nnet/gen_splice.py @@ -26,10 +26,7 @@ from optparse import OptionParser def print_on_same_line(text): - if (sys.version_info > (3,0)): - print(text, end=' ') - else: - print text, + print(text, end=' ') parser = OptionParser() parser.add_option('--fea-dim', dest='dim_in', help='feature dimension') diff --git a/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py b/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py deleted file mode 100755 index 172660da825..00000000000 --- a/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/python - -# Copyright 2014 Brno University of Technology (author: Karel Vesely) - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# Generated Nnet prototype, to be initialized by 'nnet-initialize'. - -from __future__ import division -from __future__ import print_function -import math, random, sys, warnings -from optparse import OptionParser - -### -### Parse options -### -usage="%prog [options] >nnet-proto-file" -parser = OptionParser(usage) - -parser.add_option('--activation-type', dest='activation_type', - help='Select type of activation function : (|) [default: %default]', - default='', type='string'); - -parser.add_option('--cnn1-num-filters', dest='cnn1_num_filters', - help='Number of filters in first convolutional layer [default: %default]', - default=128, type='int') -# this is given by splice -# parser.add_option('--cnn1-fmap-x-len', dest='cnn1_fmap_x_len', -# help='Size of cnn1-fmap-x-len [default: %default]', -# default=11, type='int') - -# this should be equal to feat_raw_dim -# parser.add_option('--cnn1-fmap-y-len', dest='cnn1_fmap_y_len', -# help='Size of cnn1-fmap-y-len [default: %default]', -# default=32, type='int') - -parser.add_option('--cnn1-filt-x-len', dest='cnn1_filt_x_len', - help='Size of cnn1-filt-x-len [default: %default]', - default=9, type='int') -parser.add_option('--cnn1-filt-y-len', dest='cnn1_filt_y_len', - help='Size of cnn1-filt-y-len [default: %default]', - default=9, type='int') - -parser.add_option('--cnn1-filt-x-step', dest='cnn1_filt_x_step', - help='Size of cnn1-filt-x-step [default: %default]', - default=1, type='int') -parser.add_option('--cnn1-filt-y-step', dest='cnn1_filt_y_step', - help='Size of cnn1-filt-y-step [default: %default]', - default=1, type='int') -parser.add_option('--cnn1-connect-fmap', dest='cnn1_connect_fmap', - help='Size of cnn1-connect-fmap [default: %default]', - default=0, type='int') - -parser.add_option('--pool1-x-len', dest='pool1_x_len', - help='Size of pool1-filt-x-len [default: %default]', - default=1, type='int') -parser.add_option('--pool1-x-step', dest='pool1_x_step', - help='Size of pool1-x-step [default: %default]', - default=1, type='int') - - -# -parser.add_option('--pool1-y-len', dest='pool1_y_len', - help='Size of pool1-y-len [default: %default]', - default=3, type='int') -parser.add_option('--pool1-y-step', dest='pool1_y_step', - help='Size of pool1-y-step [default: %default]', - default=3, type='int') - -parser.add_option('--pool1-type', dest='pool1_type', - help='Type of pooling (Max || Average) [default: %default]', - default='Max', type='string') - -parser.add_option('--cnn2-num-filters', dest='cnn2_num_filters', - help='Number of filters in first convolutional layer [default: %default]', - default=256, type='int') -parser.add_option('--cnn2-filt-x-len', dest='cnn2_filt_x_len', - help='Size of cnn2-filt-x-len [default: %default]', - default=3, type='int') -parser.add_option('--cnn2-filt-y-len', dest='cnn2_filt_y_len', - help='Size of cnn2-filt-y-len [default: %default]', - default=4, type='int') -parser.add_option('--cnn2-filt-x-step', dest='cnn2_filt_x_step', - help='Size of cnn2-filt-x-step [default: %default]', - default=1, type='int') -parser.add_option('--cnn2-filt-y-step', dest='cnn2_filt_y_step', - help='Size of cnn2-filt-y-step [default: %default]', - default=1, type='int') -parser.add_option('--cnn2-connect-fmap', dest='cnn2_connect_fmap', - help='Size of cnn2-connect-fmap [default: %default]', - default=1, type='int') - -parser.add_option('--pitch-dim', dest='pitch_dim', - help='Number of features representing pitch [default: %default]', - default=0, type='int') -parser.add_option('--delta-order', dest='delta_order', - help='Order of delta features [default: %default]', - default=2, type='int') -parser.add_option('--splice', dest='splice', - help='Length of splice [default: %default]', - default=5,type='int') -parser.add_option('--dir', dest='dirct', - help='Directory, where network prototypes will be saved [default: %default]', - default='.', type='string') -parser.add_option('--num-pitch-neurons', dest='num_pitch_neurons', - help='Number of neurons in layers processing pitch features [default: %default]', - default='200', type='int') - - -(o,args) = parser.parse_args() -if len(args) != 1 : - parser.print_help() - sys.exit(1) - -feat_dim=int(args[0]) -### End parse options - -feat_raw_dim = feat_dim / (o.delta_order+1) / (o.splice*2+1) - o.pitch_dim # we need number of feats without deltas and splice and pitch -o.cnn1_fmap_y_len = feat_raw_dim -o.cnn1_fmap_x_len = o.splice*2+1 - -# Check -assert(feat_dim > 0) -assert(o.pool1_type == 'Max' or o.pool1_type == 'Average') - -## Extra checks if dimensions are matching, if not match them by -## producing a warning -# cnn1 -assert( (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) % o.cnn1_filt_y_step == 0 ) -assert( (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) % o.cnn1_filt_x_step == 0 ) - -# subsample1 -cnn1_out_fmap_y_len=(1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step) -cnn1_out_fmap_x_len=(1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step) - -# fix filt_len and filt_step -def fix_filt_step(inp_len, filt_len, filt_step): - - if ((inp_len - filt_len) % filt_step == 0): - return filt_step - else: - # filt_step <= filt_len - for filt_step in range(filt_len, 0, -1): - if ((inp_len - filt_len) % filt_step == 0): - return filt_step - -o.pool1_y_step = fix_filt_step(cnn1_out_fmap_y_len, o.pool1_y_len, o.pool1_y_step) -if o.pool1_y_step == 1 and o.pool1_y_len != 1: - warnings.warn('WARNING: Choose different pool1_y_len as subsampling is not happening'); - -o.pool1_x_step = fix_filt_step(cnn1_out_fmap_x_len, o.pool1_x_len, o.pool1_x_step) -if o.pool1_x_step == 1 and o.pool1_x_len != 1: - warnings.warn('WARNING: Choose different pool1_x_len as subsampling is not happening'); - - -### -### Print prototype of the network -### - -# Begin the prototype -print("") - -# Convolutional part of network -'''1st CNN layer''' -cnn1_input_dim=feat_raw_dim * (o.delta_order+1) * (o.splice*2+1) -cnn1_out_fmap_x_len=(1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step) -cnn1_out_fmap_y_len=(1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step) -cnn1_output_dim=o.cnn1_num_filters * cnn1_out_fmap_x_len * cnn1_out_fmap_y_len - -'''1st Pooling layer''' -pool1_input_dim=cnn1_output_dim -pool1_fmap_x_len=cnn1_out_fmap_x_len -pool1_out_fmap_x_len=(1 + (pool1_fmap_x_len - o.pool1_x_len) / o.pool1_x_step) -pool1_fmap_y_len=cnn1_out_fmap_y_len -pool1_out_fmap_y_len=(1 + (pool1_fmap_y_len - o.pool1_y_len) / o.pool1_y_step) -pool1_output_dim=o.cnn1_num_filters*pool1_out_fmap_x_len*pool1_out_fmap_y_len - -'''2nd CNN layer''' -cnn2_input_dim=pool1_output_dim -cnn2_fmap_x_len=pool1_out_fmap_x_len -cnn2_out_fmap_x_len=(1 + (cnn2_fmap_x_len - o.cnn2_filt_x_len) / o.cnn2_filt_x_step) -cnn2_fmap_y_len=pool1_out_fmap_y_len -cnn2_out_fmap_y_len=(1 + (cnn2_fmap_y_len - o.cnn2_filt_y_len) / o.cnn2_filt_y_step) -cnn2_output_dim=o.cnn2_num_filters * cnn2_out_fmap_x_len * cnn2_out_fmap_y_len - - -convolution_proto = '' - -convolution_proto += " %d %d %d %d %d %d %d %d %d %f %f %f\n" % \ - ( cnn1_input_dim, cnn1_output_dim, o.cnn1_fmap_x_len, o.cnn1_fmap_y_len, o.cnn1_filt_x_len, o.cnn1_filt_y_len, o.cnn1_filt_x_step, o.cnn1_filt_y_step, o.cnn1_connect_fmap, 0.0, 0.0, 0.01 ) -convolution_proto += "<%sPooling2DComponent> %d %d %d %d %d %d %d %d\n" % \ - ( o.pool1_type, pool1_input_dim, pool1_output_dim, pool1_fmap_x_len, pool1_fmap_y_len, o.pool1_x_len, o.pool1_y_len, o.pool1_x_step, o.pool1_y_step ) -convolution_proto += " %d %d %f\n" % \ - ( pool1_output_dim, pool1_output_dim, 1.0 ) -convolution_proto += " %d %d %f\n" % \ - ( pool1_output_dim, pool1_output_dim, 0.0 ) -convolution_proto += "%s %d %d\n" % \ - ( o.activation_type, pool1_output_dim, pool1_output_dim ) -convolution_proto += " %d %d %d %d %d %d %d %d %d %f %f %f\n" % \ - ( cnn2_input_dim, cnn2_output_dim, cnn2_fmap_x_len, cnn2_fmap_y_len, o.cnn2_filt_x_len, o.cnn2_filt_y_len, o.cnn2_filt_x_step, o.cnn2_filt_y_step, o.cnn2_connect_fmap, -2.0, 4.0, 0.1 ) -convolution_proto += " %d %d %f\n" % \ - ( cnn2_output_dim, cnn2_output_dim, 1.0) -convolution_proto += " %d %d %f\n" % \ - ( cnn2_output_dim, cnn2_output_dim, 0.0) -convolution_proto += "%s %d %d\n" % \ - ( o.activation_type, cnn2_output_dim, cnn2_output_dim) - -if (o.pitch_dim > 0): - # convolutional part - f_conv = open('%s/nnet.proto.convolution' % o.dirct, 'w') - f_conv.write('\n') - f_conv.write(convolution_proto) - f_conv.write('\n') - f_conv.close() - - # pitch part - f_pitch = open('%s/nnet.proto.pitch' % o.dirct, 'w') - f_pitch.write('\n') - f_pitch.write(' %d %d %f %f %f\n' % \ - ((o.pitch_dim * (o.delta_order+1) * (o.splice*2+1)), o.num_pitch_neurons, -2.0, 4.0, 0.109375)) - f_pitch.write('%s %d %d\n' % \ - (o.activation_type, o.num_pitch_neurons, o.num_pitch_neurons)) - f_pitch.write(' %d %d %f %f %f\n' % \ - (o.num_pitch_neurons, o.num_pitch_neurons, -2.0, 4.0, 0.109375)) - f_pitch.write('%s %d %d\n' % \ - (o.activation_type, o.num_pitch_neurons, o.num_pitch_neurons)) - f_pitch.write('\n') - f_pitch.close() - - # paralell part - vector = '' - for i in range(1, (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), feat_raw_dim + o.pitch_dim): - vector += '%d:1:%d ' % (i, i + feat_raw_dim - 1) - for i in range(feat_raw_dim+1, (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), feat_raw_dim + o.pitch_dim): - vector += '%d:1:%d ' % (i, i + o.pitch_dim - 1) - print(' %d %d %s ' % \ - ((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), vector)) - print(' %d %d %s %s ' % \ - ((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), o.num_pitch_neurons + cnn2_output_dim, '%s/nnet.proto.convolution' % o.dirct, '%s/nnet.proto.pitch' % o.dirct)) - - num_convolution_output = o.num_pitch_neurons + cnn2_output_dim -else: # no pitch - print(convolution_proto) - -# We are done! -sys.exit(0) - - diff --git a/src/nnet/nnet-average-pooling-2d-component.h b/src/nnet/nnet-average-pooling-2d-component.h deleted file mode 100644 index 17ae87f94db..00000000000 --- a/src/nnet/nnet-average-pooling-2d-component.h +++ /dev/null @@ -1,209 +0,0 @@ -// nnet/nnet-average-pooling-2d-component.h - -// Copyright 2014 Brno University of Technology (author: Karel Vesely) -// Johns Hopkins University (author: Sri Harish Mallidi) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_NNET_NNET_AVERAGE_POOLING_2D_COMPONENT_H_ -#define KALDI_NNET_NNET_AVERAGE_POOLING_2D_COMPONENT_H_ - -#include -#include - -#include "nnet/nnet-component.h" -#include "nnet/nnet-utils.h" -#include "cudamatrix/cu-math.h" - -namespace kaldi { -namespace nnet1 { - -/** - * AveragePoolingComponent : - * The input/output matrices are split to submatrices with width 'pool_stride_'. - * The pooling is done over 3rd axis, of the set of 2d matrices. - * Our pooling supports overlaps, overlaps occur when (pool_step_ < pool_size_). - */ -class AveragePooling2DComponent : public Component { - public: - AveragePooling2DComponent(int32 dim_in, int32 dim_out): - Component(dim_in, dim_out), - fmap_x_len_(0), fmap_y_len_(0), - pool_x_len_(0), pool_y_len_(0), - pool_x_step_(0), pool_y_step_(0) - { } - ~AveragePooling2DComponent() - { } - - Component* Copy() const { return new AveragePooling2DComponent(*this); } - ComponentType GetType() const { return kAveragePooling2DComponent; } - - void InitData(std::istream &is) { - // parse config - std::string token; - while (is >> std::ws, !is.eof()) { - ReadToken(is, false, &token); - /**/ if (token == "") ReadBasicType(is, false, &fmap_x_len_); - else if (token == "") ReadBasicType(is, false, &fmap_y_len_); - else if (token == "") ReadBasicType(is, false, &pool_x_len_); - else if (token == "") ReadBasicType(is, false, &pool_y_len_); - else if (token == "") ReadBasicType(is, false, &pool_x_step_); - else if (token == "") ReadBasicType(is, false, &pool_y_step_); - else KALDI_ERR << "Unknown token " << token << ", a typo in config?" - << " (FmapXLen|FmapYLen|PoolXLen|PoolYLen|PoolXStep|PoolYStep)"; - } - // check - KALDI_ASSERT(fmap_x_len_ * fmap_y_len_ != 0); - KALDI_ASSERT(pool_x_len_ * pool_y_len_ != 0); - KALDI_ASSERT(pool_x_step_ * pool_y_step_ != 0); - } - - void ReadData(std::istream &is, bool binary) { - // pooling hyperparameters - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &fmap_x_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &fmap_y_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_x_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_y_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_x_step_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_y_step_); - - // - // Sanity checks: - // - // input sanity checks - // input_dim_ should be multiple of (fmap_x_len_ * fmap_y_len_) - KALDI_ASSERT(input_dim_ % (fmap_x_len_ * fmap_y_len_) == 0); - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - KALDI_LOG << "num_fmaps " << num_input_fmaps; - // check if step is in sync with fmap_len and filt_len - KALDI_ASSERT((fmap_x_len_ - pool_x_len_) % (pool_x_step_) == 0); - KALDI_ASSERT((fmap_y_len_ - pool_y_len_) % (pool_y_step_) == 0); - int32 out_fmap_x_len = (fmap_x_len_ - pool_x_len_)/pool_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - pool_y_len_)/pool_y_step_ + 1; - // int32 out_fmap_size = out_fmap_x_len*out_fmap_y_len; - // output sanity checks - KALDI_ASSERT(output_dim_ % (out_fmap_x_len * out_fmap_y_len) == 0); - int32 num_output_fmaps = output_dim_ / (out_fmap_x_len * out_fmap_y_len); - KALDI_ASSERT(num_input_fmaps == num_output_fmaps); - } - - void WriteData(std::ostream &os, bool binary) const { - // pooling hyperparameters - WriteToken(os, binary, ""); - WriteBasicType(os, binary, fmap_x_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, fmap_y_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_x_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_y_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_x_step_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_y_step_); - } - - void PropagateFnc(const CuMatrixBase &in, - CuMatrixBase *out) { - // useful dims - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - int out_fmap_cnt = 0; - for (int32 m = 0; m < fmap_x_len_-pool_x_len_+1; m = m+pool_x_step_) { - for (int32 n = 0; n < fmap_y_len_-pool_y_len_+1; n = n+pool_y_step_) { - int32 st = 0; - st = (m * fmap_y_len_ + n) * num_input_fmaps; - CuSubMatrix pool(out->ColRange(out_fmap_cnt * num_input_fmaps, num_input_fmaps)); - pool.SetZero(); // reset - for (int32 i = 0; i < pool_x_len_; i++) { - for (int32 j = 0; j < pool_y_len_; j++) { - int32 c = 0; - c = st + i * (num_input_fmaps * fmap_y_len_) - + j * num_input_fmaps; - pool.AddMat(1.0, in.ColRange(c, num_input_fmaps)); - } - } - pool.Scale(1.0 / (pool_x_len_ * pool_y_len_)); - out_fmap_cnt++; - } - } - } - - void BackpropagateFnc(const CuMatrixBase &in, - const CuMatrixBase &out, - const CuMatrixBase &out_diff, - CuMatrixBase *in_diff) { - // useful dims - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - int32 inp_fmap_size = fmap_x_len_ * fmap_y_len_; - // - // here we note how many diff matrices are summed for each input patch, - std::vector patch_summands(inp_fmap_size, 0); - // this metainfo will be used to divide diff of patches - // used in more than one pool. - // - - in_diff->SetZero(); // reset - int out_fmap_cnt = 0; - for (int32 m = 0; m < fmap_x_len_-pool_x_len_+1; m = m+pool_x_step_) { - for (int32 n = 0; n < fmap_y_len_-pool_y_len_+1; n = n+pool_y_step_) { - int32 st = 0; - st = (m * fmap_y_len_ + n) * num_input_fmaps; - CuSubMatrix src(out_diff.ColRange(out_fmap_cnt * num_input_fmaps, num_input_fmaps)); - for (int32 i = 0; i < pool_x_len_; i++) { - for (int32 j = 0; j < pool_y_len_; j++) { - int32 c = 0; - c = st + i * (num_input_fmaps * fmap_y_len_) - + j * num_input_fmaps; - CuSubMatrix tgt(in_diff->ColRange(c, num_input_fmaps)); - tgt.AddMat(1.0, src); - patch_summands[c / num_input_fmaps] += 1; - } - } - out_fmap_cnt++; - } - } - - // divide diff by average-pooling-dim (derivative of averaging) - in_diff->Scale(1.0 / (pool_x_len_ * pool_y_len_)); - - // divide diff by #summands (compensate for patches used in more pools) - for (int i = 0; i < fmap_x_len_; i++) { - for (int32 j = 0; j < fmap_y_len_; j++) { - int32 c = i * fmap_y_len_ + j; - CuSubMatrix tgt(in_diff->ColRange(c*num_input_fmaps, num_input_fmaps)); - KALDI_ASSERT(patch_summands[c] > 0); // patch at least in one pool - tgt.Scale(1.0 / patch_summands[c]); - } - } - } - - private: - int32 fmap_x_len_, fmap_y_len_, - pool_x_len_, pool_y_len_, - pool_x_step_, pool_y_step_; -}; - -} // namespace nnet1 -} // namespace kaldi - -#endif // KALDI_NNET_NNET_AVERAGE_POOLING_2D_COMPONENT_H_ diff --git a/src/nnet/nnet-component-test.cc b/src/nnet/nnet-component-test.cc index da181bd18f6..0786eb51c15 100644 --- a/src/nnet/nnet-component-test.cc +++ b/src/nnet/nnet-component-test.cc @@ -24,10 +24,7 @@ #include "nnet/nnet-component.h" #include "nnet/nnet-nnet.h" #include "nnet/nnet-convolutional-component.h" -#include "nnet/nnet-convolutional-2d-component.h" #include "nnet/nnet-max-pooling-component.h" -#include "nnet/nnet-max-pooling-2d-component.h" -#include "nnet/nnet-average-pooling-2d-component.h" #include "util/common-utils.h" namespace kaldi { @@ -245,151 +242,6 @@ namespace nnet1 { delete c; } - void UnitTestMaxPooling2DComponent() { /* Implemented by Harish Mallidi */ - // make max-pooling2d component - Component* c = Component::Init( - " 56 18 \ - 4 7 2 3 \ - 1 2" - ); - - // input matrix, - CuMatrix mat_in; - ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 \ - 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 \ - 22 22 23 23 24 24 25 25 26 26 27 27 ]", &mat_in); - - // expected output (max values in the patch) - CuMatrix mat_out_ref; - ReadCuMatrixFromString("[ 9 9 11 11 13 13 16 16 18 18 \ - 20 20 23 23 25 25 27 27 ]", &mat_out_ref); - - // propagate, - CuMatrix mat_out; - c->Propagate(mat_in, &mat_out); - KALDI_LOG << "mat_out" << mat_out << "mat_out_ref" << mat_out_ref; - AssertEqual(mat_out, mat_out_ref); - - - // locations of max values will be shown - CuMatrix mat_out_diff(mat_out); - ReadCuMatrixFromString( - "[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 ]", &mat_out_diff - ); - - // expected backpropagated values, - CuMatrix mat_in_diff_ref; // hand-computed back-propagated values, - ReadCuMatrixFromString("[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ - 0.25 0.25 0 0 1 1 0 0 0 0 0.75 0.75 0 0 1 1 0 0 2.5 2.5 \ - 0 0 0 0 3 3 0 0 3.5 3.5 0 0 8 8 ]", &mat_in_diff_ref - ); - - // backpropagate, - CuMatrix mat_in_diff; - c->Backpropagate(mat_in, mat_out, mat_out_diff, &mat_in_diff); - KALDI_LOG << "mat_in_diff " << mat_in_diff - << " mat_in_diff_ref " << mat_in_diff_ref; - AssertEqual(mat_in_diff, mat_in_diff_ref); - - delete c; - } - - void UnitTestAveragePooling2DComponent() { /* Implemented by Harish Mallidi */ - // make average-pooling2d component - Component* c = Component::Init( - " 56 18 \ - 4 7 2 3 \ - 1 2" - ); - - // input matrix, - CuMatrix mat_in; - ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 \ - 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 \ - 21 21 22 22 23 23 24 24 25 25 26 26 27 27 ]", &mat_in); - - // expected output (max values in the patch) - CuMatrix mat_out_ref; - ReadCuMatrixFromString("[ 4.5 4.5 6.5 6.5 8.5 8.5 11.5 11.5 13.5 13.5 \ - 15.5 15.5 18.5 18.5 20.5 20.5 22.5 22.5 ]", &mat_out_ref); - - // propagate, - CuMatrix mat_out; - c->Propagate(mat_in, &mat_out); - KALDI_LOG << "mat_out" << mat_out << "mat_out_ref" << mat_out_ref; - AssertEqual(mat_out, mat_out_ref); - - - // locations of max values will be shown - CuMatrix mat_out_diff(mat_out); - ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 ]", &mat_out_diff); - - // expected backpropagated values, - CuMatrix mat_in_diff_ref; // hand-computed back-propagated values, - ReadCuMatrixFromString("[ 0 0 0 0 0.0833333 0.0833333 0.166667 0.166667 \ - 0.25 0.25 0.333333 0.333333 0.333333 0.333333 0.25 0.25 0.25 0.25 \ - 0.333333 0.333333 0.416667 0.416667 0.5 0.5 0.583333 0.583333 0.583333 \ - 0.583333 0.75 0.75 0.75 0.75 0.833333 0.833333 0.916667 0.916667 1 1 \ - 1.08333 1.08333 1.08333 1.08333 1 1 1 1 1.08333 1.08333 1.16667 1.16667 \ - 1.25 1.25 1.33333 1.33333 1.33333 1.33333 ]", &mat_in_diff_ref - ); - - // backpropagate, - CuMatrix mat_in_diff; - c->Backpropagate(mat_in, mat_out, mat_out_diff, &mat_in_diff); - KALDI_LOG << "mat_in_diff " << mat_in_diff - << " mat_in_diff_ref " << mat_in_diff_ref; - AssertEqual(mat_in_diff, mat_in_diff_ref); - - delete c; - } - - - void UnitTestConvolutional2DComponent() { /* Implemented by Harish Mallidi */ - // Convolutional2D component - Component* c = ReadComponentFromString(" 18 56 \ - 0 0 4 7 \ - 2 3 1 2 1 \ - [ 0 0 1 1 2 2 3 3 4 4 5 5 ; 0 0 1 1 2 2 3 3 4 4 5 5 ] \ - [ 0 0 ]" - ); - - // input matrix - CuMatrix mat_in; - ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 \ - 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 \ - 21 21 22 22 23 23 24 24 25 25 26 26 27 27 ]", &mat_in); - - CuMatrix mat_out_ref; - ReadCuMatrixFromString("[ 206 206 266 266 326 326 416 416 476 476 536 536 \ - 626 626 686 686 746 746 ]", &mat_out_ref); - - // propagate - CuMatrix mat_out; - c->Propagate(mat_in, &mat_out); - KALDI_LOG << "mat_out" << mat_out << "mat_out" << mat_out_ref; - AssertEqual(mat_out, mat_out_ref); - - // prepare mat_out_diff, mat_in_diff_ref, - CuMatrix mat_out_diff; - ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 ]", - &mat_out_diff); - - CuMatrix mat_in_diff_ref; - ReadCuMatrixFromString("[ 0 0 0 0 0 0 2 2 2 2 4 4 8 8 0 0 3 3 4.5 4.5 8 8 \ - 9.5 9.5 13 13 20 20 9 9 18 18 19.5 19.5 23 23 24.5 24.5 28 28 41 41 \ - 36 36 48 48 51 51 56 56 59 59 64 64 80 80 ]", &mat_in_diff_ref); - - // backpropagate - CuMatrix mat_in_diff; - c->Backpropagate(mat_in, mat_out, mat_out_diff, &mat_in_diff); - KALDI_LOG << "mat_in_diff " << mat_in_diff - << " mat_in_diff_ref " << mat_in_diff_ref; - AssertEqual(mat_in_diff, mat_in_diff_ref); - - delete c; - } - void UnitTestDropoutComponent() { Component* c = ReadComponentFromString(" 100 100 0.7"); // buffers, @@ -434,9 +286,6 @@ int main() { UnitTestConvolutionalComponentUnity(); UnitTestConvolutionalComponent3x3(); UnitTestMaxPoolingComponent(); - UnitTestConvolutional2DComponent(); - UnitTestMaxPooling2DComponent(); - UnitTestAveragePooling2DComponent(); UnitTestDropoutComponent(); // end of unit-tests, if (loop == 0) diff --git a/src/nnet/nnet-component.cc b/src/nnet/nnet-component.cc index 34f988972a0..cf7741e6e57 100644 --- a/src/nnet/nnet-component.cc +++ b/src/nnet/nnet-component.cc @@ -35,10 +35,6 @@ #include "nnet/nnet-average-pooling-component.h" #include "nnet/nnet-max-pooling-component.h" -#include "nnet/nnet-convolutional-2d-component.h" -#include "nnet/nnet-average-pooling-2d-component.h" -#include "nnet/nnet-max-pooling-2d-component.h" - #include "nnet/nnet-lstm-projected.h" #include "nnet/nnet-blstm-projected.h" #include "nnet/nnet-recurrent.h" @@ -56,7 +52,6 @@ const struct Component::key_value Component::kMarkerMap[] = { { Component::kAffineTransform, "" }, { Component::kLinearTransform, "" }, { Component::kConvolutionalComponent, "" }, - { Component::kConvolutional2DComponent, "" }, { Component::kLstmProjected, "" }, { Component::kLstmProjected, "" }, // bwd compat. { Component::kBlstmProjected, "" }, @@ -77,9 +72,7 @@ const struct Component::key_value Component::kMarkerMap[] = { { Component::kRescale, "" }, { Component::kKlHmm, "" }, { Component::kAveragePoolingComponent, "" }, - { Component::kAveragePooling2DComponent, "" }, { Component::kMaxPoolingComponent, "" }, - { Component::kMaxPooling2DComponent, "" }, { Component::kSentenceAveragingComponent, "" }, { Component::kSimpleSentenceAveragingComponent, "" }, { Component::kFramePoolingComponent, "" }, @@ -127,9 +120,6 @@ Component* Component::NewComponentOfType(ComponentType comp_type, case Component::kConvolutionalComponent : ans = new ConvolutionalComponent(input_dim, output_dim); break; - case Component::kConvolutional2DComponent : - ans = new Convolutional2DComponent(input_dim, output_dim); - break; case Component::kLstmProjected : ans = new LstmProjected(input_dim, output_dim); break; @@ -190,15 +180,9 @@ Component* Component::NewComponentOfType(ComponentType comp_type, case Component::kAveragePoolingComponent : ans = new AveragePoolingComponent(input_dim, output_dim); break; - case Component::kAveragePooling2DComponent : - ans = new AveragePooling2DComponent(input_dim, output_dim); - break; case Component::kMaxPoolingComponent : ans = new MaxPoolingComponent(input_dim, output_dim); break; - case Component::kMaxPooling2DComponent : - ans = new MaxPooling2DComponent(input_dim, output_dim); - break; case Component::kFramePoolingComponent : ans = new FramePoolingComponent(input_dim, output_dim); break; diff --git a/src/nnet/nnet-component.h b/src/nnet/nnet-component.h index 2ef56622ca8..0cca2608b21 100644 --- a/src/nnet/nnet-component.h +++ b/src/nnet/nnet-component.h @@ -51,7 +51,6 @@ class Component { kAffineTransform, kLinearTransform, kConvolutionalComponent, - kConvolutional2DComponent, kLstmProjected, kBlstmProjected, kRecurrentComponent, @@ -79,9 +78,7 @@ class Component { kSentenceAveragingComponent, /* deprecated */ kSimpleSentenceAveragingComponent, kAveragePoolingComponent, - kAveragePooling2DComponent, kMaxPoolingComponent, - kMaxPooling2DComponent, kFramePoolingComponent, kParallelComponent, kMultiBasisComponent diff --git a/src/nnet/nnet-convolutional-2d-component.h b/src/nnet/nnet-convolutional-2d-component.h deleted file mode 100644 index 135ce894541..00000000000 --- a/src/nnet/nnet-convolutional-2d-component.h +++ /dev/null @@ -1,495 +0,0 @@ -// nnet/nnet-convolutional-2d-component.h - -// Copyright 2014-2015 Johns Hopkins University (author: Sri Harish Mallidi) -// Brno University of Technology (author: Karel Vesely), -// - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_NNET_NNET_CONVOLUTIONAL_2D_COMPONENT_H_ -#define KALDI_NNET_NNET_CONVOLUTIONAL_2D_COMPONENT_H_ - -#include -#include - -#include "nnet/nnet-component.h" -#include "nnet/nnet-various.h" -#include "cudamatrix/cu-math.h" - -namespace kaldi { -namespace nnet1 { - -/** - * Convolutional2DComponent implements convolution over 2-axis (frequency and temporal) - * (i.e. frequency axis in case we are the 1st component in NN). - * // We don't do convolution along temporal axis, which simplifies the - * // implementation (and was not helpful for Tara). - * - * We assume the input featrues are spliced, i.e. each frame - * is in fact a set of stacked frames, where we can form patches - * which span over several frequency bands and time axes. - * - * The convolution is done over whole axis with same filters, - * i.e. we don't use separate filters for different 'regions' - * of frequency axis. - * - * In order to have a fast implementations, the filters - * are represented in vectorized form, where each rectangular - * filter corresponds to a row in a matrix, where all filters - * are stored. The features are then re-shaped to a set of matrices, - * where one matrix corresponds to single patch-position, - * where the filters get applied. - * - * The type of convolution is controled by hyperparameters: - * x_patch_dim_,y_patch_dim_ ... temporal and frequency axes sizes of the patch (e.g. (9,9) for 9x9 2D filter) - * x_patch_step_,y_patch_step_ ... temporal and frequencey sizes of shifts in the convolution (e.g. (1,1) 2D filter with 1 step shift in both axes) - * x_patch_stride_,y_patch_stride_ ... dimension of the feature (maps if inside convolutional layer) (e.g. (11,32) for 32-band 11 frame spliced spectrogram patch) - * The type of convolution is controlled by hyperparameters: - * fmap_x_len_, fmap_y_len_ ... dimension of the feature (maps if inside convolutional layer) (e.g. (11,32) for 32-band 11 frame spliced spectrogram patch) - * filt_x_len_, filt_y_len_ ... temporal and frequency sizes of the filters (e.g. (9,9) for 9x9 2D filter) - * filt_x_step_, filt_y_step_ ... temporal and frequency sizes of the filters (e.g. (1,1) for 2D-filter, with 1 step shift in both axes) - * - * - * Due to convolution same weights are used repeateadly, - * the final gradient is average of all position-specific - * gradients. - * - */ -class Convolutional2DComponent : public UpdatableComponent { - public: - Convolutional2DComponent(int32 dim_in, int32 dim_out): - UpdatableComponent(dim_in, dim_out), - fmap_x_len_(0), fmap_y_len_(0), - filt_x_len_(0), filt_y_len_(0), - filt_x_step_(0), filt_y_step_(0), - connect_fmap_(0) - { } - - ~Convolutional2DComponent() - { } - - Component* Copy() const { return new Convolutional2DComponent(*this); } - ComponentType GetType() const { return kConvolutional2DComponent; } - - void InitData(std::istream &is) { - // define options - BaseFloat bias_mean = -2.0, bias_range = 2.0, param_stddev = 0.1; - // parse config - std::string token; - while (is >> std::ws, !is.eof()) { - ReadToken(is, false, &token); - /**/ if (token == "") ReadBasicType(is, false, ¶m_stddev); - else if (token == "") ReadBasicType(is, false, &bias_mean); - else if (token == "") ReadBasicType(is, false, &bias_range); - else if (token == "") ReadBasicType(is, false, &fmap_x_len_); - else if (token == "") ReadBasicType(is, false, &fmap_y_len_); - else if (token == "") ReadBasicType(is, false, &filt_x_len_); - else if (token == "") ReadBasicType(is, false, &filt_y_len_); - else if (token == "") ReadBasicType(is, false, &filt_x_step_); - else if (token == "") ReadBasicType(is, false, &filt_y_step_); - else if (token == "") ReadBasicType(is, false, &connect_fmap_); - else if (token == "") ReadBasicType(is, false, &learn_rate_coef_); - else if (token == "") ReadBasicType(is, false, &bias_learn_rate_coef_); - else KALDI_ERR << "Unknown token " << token << ", a typo in config? " - << "(ParamStddev|BiasMean|BiasRange|FmapXLen|FmapYLen|" - "FiltXLen|FiltYLen|FiltXStep|FiltYStep|ConnectFmap|" - "LearnRateCoef|BiasLearnRateCoef)"; - } - - // - // Sanity checks: - // - // input sanity checks - // input_dim_ should be multiple of (fmap_x_len_ * fmap_y_len_) - KALDI_ASSERT(input_dim_ % (fmap_x_len_ * fmap_y_len_) == 0); - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - KALDI_LOG << "num_input_fmaps " << num_input_fmaps; - // check if step is in sync with fmap_len and filt_len - KALDI_ASSERT((fmap_x_len_ - filt_x_len_) % (filt_x_step_) == 0); - KALDI_ASSERT((fmap_y_len_ - filt_y_len_) % (filt_y_step_) == 0); - int32 out_fmap_x_len = (fmap_x_len_ - filt_x_len_)/filt_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - filt_y_len_)/filt_y_step_ + 1; - // output sanity checks - KALDI_ASSERT(output_dim_ % (out_fmap_x_len * out_fmap_y_len) == 0); - int32 num_output_fmaps = output_dim_ / (out_fmap_x_len * out_fmap_y_len); - KALDI_LOG << "num_output_fmaps " << num_output_fmaps; - int32 num_filters = output_dim_/(out_fmap_x_len*out_fmap_y_len); - KALDI_LOG << "num_filters " << num_filters; - - // - // Initialize trainable parameters, - // - filters_.Resize(num_filters, num_input_fmaps*filt_x_len_*filt_y_len_); - RandGauss(0.0, param_stddev, &filters_); - // - bias_.Resize(num_filters); - RandUniform(bias_mean, bias_range, &bias_); - } - - void ReadData(std::istream &is, bool binary) { - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &learn_rate_coef_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &bias_learn_rate_coef_); - // convolution hyperparameters - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &fmap_x_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &fmap_y_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &filt_x_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &filt_y_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &filt_x_step_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &filt_y_step_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &connect_fmap_); - - // trainable parameters - ExpectToken(is, binary, ""); - filters_.Read(is, binary); - ExpectToken(is, binary, ""); - bias_.Read(is, binary); - - // - // Sanity checks: - // - // input sanity checks - // input_dim_ should be multiple of (fmap_x_len_ * fmap_y_len_) - KALDI_ASSERT(input_dim_ % (fmap_x_len_ * fmap_y_len_) == 0); - // int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - // KALDI_LOG << "num_input_fmaps " << num_input_fmaps; - // check if step is in sync with fmap_len and filt_len - KALDI_ASSERT((fmap_x_len_ - filt_x_len_) % (filt_x_step_) == 0); - KALDI_ASSERT((fmap_y_len_ - filt_y_len_) % (filt_y_step_) == 0); - int32 out_fmap_x_len = (fmap_x_len_ - filt_x_len_)/filt_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - filt_y_len_)/filt_y_step_ + 1; - - // output sanity checks - KALDI_ASSERT(output_dim_ % (out_fmap_x_len * out_fmap_y_len) == 0); - } - - void WriteData(std::ostream &os, bool binary) const { - WriteToken(os, binary, ""); - WriteBasicType(os, binary, learn_rate_coef_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, bias_learn_rate_coef_); - if (!binary) os << "\n"; - - // convolution hyperparameters - WriteToken(os, binary, ""); - WriteBasicType(os, binary, fmap_x_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, fmap_y_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, filt_x_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, filt_y_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, filt_x_step_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, filt_y_step_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, connect_fmap_); - if (!binary) os << "\n"; - - // trainable parameters - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - filters_.Write(os, binary); - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - bias_.Write(os, binary); - } - - int32 NumParams() const { - return filters_.NumRows()*filters_.NumCols() + bias_.Dim(); - } - - void GetGradient(VectorBase* gradient) const { - KALDI_ASSERT(gradient->Dim() == NumParams()); - int32 filters_num_elem = filters_.NumRows() * filters_.NumCols(); - gradient->Range(0, filters_num_elem).CopyRowsFromMat(filters_); - gradient->Range(filters_num_elem, bias_.Dim()).CopyFromVec(bias_); - } - - void GetParams(VectorBase* params) const { - KALDI_ASSERT(params->Dim() == NumParams()); - int32 filters_num_elem = filters_.NumRows() * filters_.NumCols(); - params->Range(0, filters_num_elem).CopyRowsFromMat(filters_); - params->Range(filters_num_elem, bias_.Dim()).CopyFromVec(bias_); - } - - void SetParams(const VectorBase& params) { - KALDI_ASSERT(params.Dim() == NumParams()); - int32 filters_num_elem = filters_.NumRows() * filters_.NumCols(); - filters_.CopyRowsFromVec(params.Range(0, filters_num_elem)); - bias_.CopyFromVec(params.Range(filters_num_elem, bias_.Dim())); - } - - std::string Info() const { - return std::string("\n filters") + MomentStatistics(filters_) + - ", lr-coef " + ToString(learn_rate_coef_) + - "\n bias" + MomentStatistics(bias_) + - ", lr-coef " + ToString(bias_learn_rate_coef_); - } - std::string InfoGradient() const { - return std::string("\n filters_grad") + MomentStatistics(filters_grad_) + - ", lr-coef " + ToString(learn_rate_coef_) + - "\n bias_grad" + MomentStatistics(bias_grad_) + - ", lr-coef " + ToString(bias_learn_rate_coef_); - } - - void PropagateFnc(const CuMatrixBase &in, - CuMatrixBase *out) { - // useful dims - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - // int32 inp_fmap_size = fmap_x_len_ * fmap_y_len_; - int32 out_fmap_x_len = (fmap_x_len_ - filt_x_len_)/filt_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - filt_y_len_)/filt_y_step_ + 1; - int32 out_fmap_size = out_fmap_x_len*out_fmap_y_len; - int32 num_output_fmaps = output_dim_ / (out_fmap_x_len * out_fmap_y_len); - // this is total num_filters, - // so each input_fmap has size num_filters/num_input_fmaps - int32 num_filters = filters_.NumRows(); - KALDI_ASSERT(num_filters == num_output_fmaps); - // int32 filter_size = filt_x_len_*filt_y_len_; - int32 num_frames = in.NumRows(); - - // we will need the buffers - if (vectorized_feature_patches_.size() == 0) { - vectorized_feature_patches_.resize(out_fmap_size); - feature_patch_diffs_.resize(out_fmap_size); - } - - for (int32 p = 0; p < out_fmap_size; p++) { - vectorized_feature_patches_[p].Resize(num_frames, filters_.NumCols()); - } - - // Checked for num_input_fmaps=1, check for num_inp_fmaps>1 - int32 out_fmap_cnt = 0; - for (int32 m = 0; m < fmap_x_len_-filt_x_len_+1; m = m+filt_x_step_) { - for (int32 n = 0; n < fmap_y_len_-filt_y_len_+1; n = n+filt_y_step_) { - std::vector column_mask; - int32 st = 0; - if (connect_fmap_ == 1) { - st = (m * fmap_y_len_ + n) * num_input_fmaps; - } else { - st = m * fmap_y_len_ * num_input_fmaps + n; - } - - for (int32 i = 0; i < filt_x_len_; i++) { - for (int32 j = 0; j < filt_y_len_*num_input_fmaps; j++) { - int32 c = 0; - if (connect_fmap_ == 1) { - c = st + i * (num_input_fmaps*fmap_y_len_) + j; - } else { - c = st + i * (num_input_fmaps * fmap_y_len_) - + (j / num_input_fmaps) - + (j % num_input_fmaps) * fmap_y_len_; - } - column_mask.push_back(c); - } - } - CuArray cu_column_mask(column_mask); - vectorized_feature_patches_[out_fmap_cnt].CopyCols(in, cu_column_mask); - out_fmap_cnt++; - } - } - - for (int32 p = 0; p < out_fmap_size; p++) { - CuSubMatrix tgt(out->ColRange(p*num_filters, num_filters)); - tgt.AddVecToRows(1.0, bias_, 0.0); - tgt.AddMatMat(1.0, vectorized_feature_patches_[p], kNoTrans, filters_, kTrans, 1.0); - } - } - - - void BackpropagateFnc(const CuMatrixBase &in, - const CuMatrixBase &out, - const CuMatrixBase &out_diff, - CuMatrixBase *in_diff) { - // useful dims - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - - int32 out_fmap_x_len = (fmap_x_len_ - filt_x_len_)/filt_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - filt_y_len_)/filt_y_step_ + 1; - int32 out_fmap_size = out_fmap_x_len * out_fmap_y_len; - int32 num_output_fmaps = output_dim_ / (out_fmap_x_len * out_fmap_y_len); - // this is total num_filters, - // so each input_fmap has num_filters/num_input_fmaps - int32 num_filters = filters_.NumRows(); - KALDI_ASSERT(num_filters == num_output_fmaps); - // int32 filter_size = filt_x_len_*filt_y_len_; - int32 num_frames = in.NumRows(); - - for (int32 p = 0; p < out_fmap_size; p++) { - feature_patch_diffs_[p].Resize(num_frames, filters_.NumCols(), kSetZero); - CuSubMatrix out_diff_patch(out_diff.ColRange(p*num_filters, num_filters)); - feature_patch_diffs_[p].AddMatMat(1.0, out_diff_patch, kNoTrans, filters_, kNoTrans, 0.0); - } - - // compute in_diff_summands_ once - if (in_diff_summands_.Dim() == 0) { - in_diff_summands_.Resize(in_diff->NumCols(), kSetZero); - for (int32 m = 0; m < fmap_x_len_-filt_x_len_+1; m = m+filt_x_step_) { - for (int32 n = 0; n < fmap_y_len_-filt_y_len_+1; n = n+filt_y_step_) { - int32 st = 0; - if (connect_fmap_ == 1) { - st = (m * fmap_y_len_ + n) * num_input_fmaps; - } else { - st = m * fmap_y_len_ * num_input_fmaps + n; - } - for (int32 i = 0; i < filt_x_len_; i++) { - for (int32 j = 0; j < filt_y_len_*num_input_fmaps; j++) { - int32 c = 0; - if (connect_fmap_ == 1) { - c = st + i * (num_input_fmaps * fmap_y_len_) + j; - } else { - c = st + i * (num_input_fmaps * fmap_y_len_) - + (j / num_input_fmaps) - + (j % num_input_fmaps) * fmap_y_len_; - } - // add 1.0 - in_diff_summands_.Range(c, 1).Add(1.0); - } - } - } - } - in_diff_summands_.InvertElements(); - } - - int32 out_fmap_cnt = 0; - - for (int32 m = 0; m < fmap_x_len_-filt_x_len_+1; m = m+filt_x_step_) { - for (int32 n = 0; n< fmap_y_len_-filt_y_len_+1; n = n+filt_y_step_) { - int32 st = 0; - if (connect_fmap_ == 1) { - st = (m * fmap_y_len_ + n) * num_input_fmaps; - } else { - st = m * fmap_y_len_ * num_input_fmaps + n; - } - - for (int32 i = 0; i < filt_x_len_; i++) { - for (int32 j = 0; j < filt_y_len_*num_input_fmaps; j++) { - int32 c = 0; - if (connect_fmap_ == 1) { - c = st + i *(num_input_fmaps*fmap_y_len_)+j; - } else { - c = st + i * (num_input_fmaps * fmap_y_len_) - + (j / num_input_fmaps) - + (j % num_input_fmaps) * fmap_y_len_; - } - // from which col? - CuMatrix& diff_mat = feature_patch_diffs_[out_fmap_cnt]; - CuSubMatrix src(diff_mat.ColRange(i*filt_y_len_*num_input_fmaps+j, 1)); - // to which col? - CuSubMatrix tgt(in_diff->ColRange(c, 1)); - tgt.AddMat(1.0, src); - } - } - out_fmap_cnt++; - } - } - // compensate for summands - in_diff->MulColsVec(in_diff_summands_); - } - - - void Update(const CuMatrixBase &input, - const CuMatrixBase &diff) { - // useful dims, - int32 out_fmap_x_len = (fmap_x_len_ - filt_x_len_)/filt_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - filt_y_len_)/filt_y_step_ + 1; - int32 out_fmap_size = out_fmap_x_len * out_fmap_y_len; - int32 num_output_fmaps = output_dim_ / (out_fmap_x_len * out_fmap_y_len); - - // This is total num_filters, - // each input_fmap has num_filters / num_input_fmaps: - int32 num_filters = filters_.NumRows(); - KALDI_ASSERT(num_filters == num_output_fmaps); - - // we use following hyperparameters from the option class, - const BaseFloat lr = opts_.learn_rate; - - // - // calculate the gradient - // - filters_grad_.Resize(filters_.NumRows(), filters_.NumCols(), kSetZero); - bias_grad_.Resize(filters_.NumRows(), kSetZero); - // - for (int32 p = 0; p < out_fmap_size; p++) { - CuSubMatrix diff_patch(diff.ColRange(p * num_filters, num_filters)); - filters_grad_.AddMatMat(1.0, diff_patch, kTrans, vectorized_feature_patches_[p], kNoTrans, 1.0); - bias_grad_.AddRowSumMat(1.0, diff_patch, 1.0); - } - // scale - filters_grad_.Scale(1.0/num_output_fmaps); - bias_grad_.Scale(1.0/num_output_fmaps); - - // - // update - // - filters_.AddMat(-lr * learn_rate_coef_, filters_grad_); - bias_.AddVec(-lr * bias_learn_rate_coef_, bias_grad_); - } - - private: - /// feature maps dimensions (for input x_ is usually splice - /// and y_ is num of fbanks) shift for 2nd dim of a patch - /// (i.e. frame length before splicing), - int32 fmap_x_len_, fmap_y_len_; - - /// 2D filter dimensions, x_ temporal, y_ spectral, - int32 filt_x_len_, filt_y_len_; - - /// 2D shifts along temporal and spectral axis, - int32 filt_x_step_, filt_y_step_; - - int32 connect_fmap_; ///< if connect_fmap_ = 1, then each fmap has num_filt - - CuMatrix filters_; ///< row = vectorized rectangular filter - CuVector bias_; ///< bias for each filter - - CuMatrix filters_grad_; ///< gradient of filters - CuVector bias_grad_; ///< gradient of biases - - /** Buffer of reshaped inputs: - * 1row = vectorized rectangular feature patch, - * 1col = dim over speech frames, - * std::vector-dim = patch-position - */ - std::vector > vectorized_feature_patches_; - - /** Buffer for backpropagation: - * derivatives in the domain of 'vectorized_feature_patches_', - * 1row = vectorized rectangular feature patch, - * 1col = dim over speech frames, - * std::vector-dim = patch-position - */ - std::vector > feature_patch_diffs_; - - /// Auxiliary vector for compensating #summands when backpropagating - CuVector in_diff_summands_; -}; - -} // namespace nnet1 -} // namespace kaldi - -#endif // KALDI_NNET_NNET_CONVOLUTIONAL_2D_COMPONENT_H_ diff --git a/src/nnet/nnet-max-pooling-2d-component.h b/src/nnet/nnet-max-pooling-2d-component.h deleted file mode 100644 index 4a4045ca73d..00000000000 --- a/src/nnet/nnet-max-pooling-2d-component.h +++ /dev/null @@ -1,225 +0,0 @@ -// nnet/nnet-max-pooling-2d-component.h - -// Copyright 2014 Brno University of Technology (author: Karel Vesely), -// Johns Hopkins University (author: Sri Harish Mallidi) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_NNET_NNET_MAX_POOLING_2D_COMPONENT_H_ -#define KALDI_NNET_NNET_MAX_POOLING_2D_COMPONENT_H_ - -#include -#include - -#include "nnet/nnet-component.h" -#include "nnet/nnet-utils.h" -#include "cudamatrix/cu-math.h" - -namespace kaldi { -namespace nnet1 { - -/** - * MaxPoolingComponent : - * The input/output matrices are split to submatrices with width 'pool_stride_'. - * The pooling is done over 3rd axis, of the set of 2d matrices. - * Our pooling supports overlaps, overlaps occur when (pool_step_ < pool_size_). - */ -class MaxPooling2DComponent : public Component { - public: - MaxPooling2DComponent(int32 dim_in, int32 dim_out): - Component(dim_in, dim_out), - fmap_x_len_(0), fmap_y_len_(0), - pool_x_len_(0), pool_y_len_(0), - pool_x_step_(0), pool_y_step_(0) - { } - - ~MaxPooling2DComponent() - { } - - Component* Copy() const { return new MaxPooling2DComponent(*this); } - ComponentType GetType() const { return kMaxPooling2DComponent; } - - void InitData(std::istream &is) { - // parse config - std::string token; - while (is >> std::ws, !is.eof()) { - ReadToken(is, false, &token); - /**/ if (token == "") ReadBasicType(is, false, &fmap_x_len_); - else if (token == "") ReadBasicType(is, false, &fmap_y_len_); - else if (token == "") ReadBasicType(is, false, &pool_x_len_); - else if (token == "") ReadBasicType(is, false, &pool_y_len_); - else if (token == "") ReadBasicType(is, false, &pool_x_step_); - else if (token == "") ReadBasicType(is, false, &pool_y_step_); - else KALDI_ERR << "Unknown token " << token << ", a typo in config?" - << " (FmapXLen|FmapYLen|PoolXLen|PoolYLen|PoolXStep|PoolYStep)"; - } - // check - KALDI_ASSERT(fmap_x_len_ * fmap_y_len_ != 0); - KALDI_ASSERT(pool_x_len_ * pool_y_len_ != 0); - KALDI_ASSERT(pool_x_step_ * pool_y_step_ != 0); - } - - void ReadData(std::istream &is, bool binary) { - // pooling hyperparameters - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &fmap_x_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &fmap_y_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_x_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_y_len_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_x_step_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &pool_y_step_); - - // - // Sanity checks: - // - // input sanity checks - // input_dim_ should be multiple of (fmap_x_len_ * fmap_y_len_) - KALDI_ASSERT(input_dim_ % (fmap_x_len_ * fmap_y_len_) == 0); - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - KALDI_LOG << "num_fmaps " << num_input_fmaps; - // check if step is in sync with fmap_len and filt_len - KALDI_ASSERT((fmap_x_len_ - pool_x_len_) % (pool_x_step_) == 0); - KALDI_ASSERT((fmap_y_len_ - pool_y_len_) % (pool_y_step_) == 0); - int32 out_fmap_x_len = (fmap_x_len_ - pool_x_len_)/pool_x_step_ + 1; - int32 out_fmap_y_len = (fmap_y_len_ - pool_y_len_)/pool_y_step_ + 1; - // int32 out_fmap_size = out_fmap_x_len*out_fmap_y_len; - // output sanity checks - KALDI_ASSERT(output_dim_ % (out_fmap_x_len * out_fmap_y_len) == 0); - int32 num_output_fmaps = output_dim_ / (out_fmap_x_len * out_fmap_y_len); - KALDI_ASSERT(num_input_fmaps == num_output_fmaps); - } - - void WriteData(std::ostream &os, bool binary) const { - // pooling hyperparameters - WriteToken(os, binary, ""); - WriteBasicType(os, binary, fmap_x_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, fmap_y_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_x_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_y_len_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_x_step_); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, pool_y_step_); - } - - void PropagateFnc(const CuMatrixBase &in, - CuMatrixBase *out) { - // useful dims - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - int out_fmap_cnt = 0; - for (int32 m = 0; m < fmap_x_len_-pool_x_len_+1; m = m+pool_x_step_) { - for (int32 n = 0; n < fmap_y_len_-pool_y_len_+1; n = n+pool_y_step_) { - int32 st = 0; - st = (m * fmap_y_len_ + n) * num_input_fmaps; - CuSubMatrix pool( - out->ColRange(out_fmap_cnt * num_input_fmaps, num_input_fmaps) - ); - pool.Set(-1e20); // reset (large neg value) - for (int32 i = 0; i < pool_x_len_; i++) { - for (int32 j = 0; j < pool_y_len_; j++) { - int32 c = 0; - c = st + i * (num_input_fmaps * fmap_y_len_) - + j * num_input_fmaps; - pool.Max(in.ColRange(c, num_input_fmaps)); - } - } - out_fmap_cnt++; - } - } - } - - void BackpropagateFnc(const CuMatrixBase &in, - const CuMatrixBase &out, - const CuMatrixBase &out_diff, - CuMatrixBase *in_diff) { - // useful dims - int32 num_input_fmaps = input_dim_ / (fmap_x_len_ * fmap_y_len_); - int32 inp_fmap_size = fmap_x_len_ * fmap_y_len_; - - // - // here we note how many diff matrices are summed for each input patch, - std::vector patch_summands(inp_fmap_size, 0); - // this metainfo will be used to divide diff of patches - // used in more than one pool. - // - - in_diff->SetZero(); // reset - - int out_fmap_cnt = 0; - for (int32 m = 0; m < fmap_x_len_-pool_x_len_+1; m = m+pool_x_step_) { - for (int32 n = 0; n < fmap_y_len_-pool_y_len_+1; n = n+pool_y_step_) { - int32 st = 0; - st = (m*fmap_y_len_+n)*num_input_fmaps; - - for (int32 i = 0; i < pool_x_len_; i++) { - for (int32 j = 0; j < pool_y_len_; j++) { - int32 c = 0; - c = st + i * (num_input_fmaps * fmap_y_len_) - + j * num_input_fmaps; - // - CuSubMatrix in_p(in.ColRange(c, num_input_fmaps)); - CuSubMatrix out_p( - out.ColRange(out_fmap_cnt*num_input_fmaps, num_input_fmaps) - ); - // - - CuSubMatrix tgt(in_diff->ColRange(c, num_input_fmaps)); - CuMatrix src( - out_diff.ColRange(out_fmap_cnt*num_input_fmaps, num_input_fmaps) - ); - - CuMatrix mask; - in_p.EqualElementMask(out_p, &mask); - src.MulElements(mask); - tgt.AddMat(1.0, src); - - patch_summands[c/num_input_fmaps] += 1; - } - } - out_fmap_cnt++; - } - } - - // divide diff by #summands (compensate for patches used in more pools), - for (int i = 0; i < fmap_x_len_; i++) { - for (int32 j = 0; j < fmap_y_len_; j++) { - int32 c = i * fmap_y_len_ + j; - CuSubMatrix tgt(in_diff->ColRange(c * num_input_fmaps, num_input_fmaps)); - KALDI_ASSERT(patch_summands[c] > 0); // patch at least in one pool - tgt.Scale(1.0 / patch_summands[c]); - } - } - } - - private: - int32 fmap_x_len_, fmap_y_len_, - pool_x_len_, pool_y_len_, - pool_x_step_, pool_y_step_; -}; - -} // namespace nnet1 -} // namespace kaldi - -#endif // KALDI_NNET_NNET_MAX_POOLING_2D_COMPONENT_H_ From 9c875ef5b0cda85a707a1c876215154300e5b1f1 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Thu, 14 Mar 2019 17:24:58 +0100 Subject: [PATCH 081/235] [doc] Small documentation fixes; update on Kaldi history (#3031) --- src/doc/dnn1.dox | 16 ++++++++-------- src/doc/history.dox | 10 +++++++--- src/nnetbin/cuda-gpu-available.cc | 11 +++++++++++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/doc/dnn1.dox b/src/doc/dnn1.dox index 223b7665274..e8dcfd90d3f 100644 --- a/src/doc/dnn1.dox +++ b/src/doc/dnn1.dox @@ -35,13 +35,13 @@ show some \ref dnn1_advanced_features, and do a light introduction to the \ref d
\section dnn1_toplevel_scripts Top-level script -Let's have a look at the script egs/wsj/s5/local/nnet/run_dnn.sh. +Let's have a look at the script egs/wsj/s5/local/nnet/run_dnn.sh. This script assumes to use a single CUDA GPU, and that kaldi was compiled with CUDA (check for 'CUDA = true' in src/kaldi.mk). Also we assume that 'cuda_cmd' is set properly in egs/wsj/s5/cmd.sh either to a GPU cluster node using 'queue.pl' or to a local machine using 'run.pl'. And finally the script assumes we already have a SAT GMM system exp/tri4b and corresponding fMLLR transforms, as generated by egs/wsj/s5/run.sh. Note that for other databases the run_dnn.sh is typically in the same location s5/local/nnet/run_dnn.sh. -The script egs/wsj/s5/local/nnet/run_dnn.sh is split into several stages: +The script egs/wsj/s5/local/nnet/run_dnn.sh is split into several stages: 0. storing 40-dimensional fMLLR features to disk, steps/nnet/make_fmllr_feats.sh, this simplifies the training scripts, the 40-dimensional features are MFCC-LDA-MLLT-fMLLR with CMN @@ -100,7 +100,7 @@ Besides the DNN recipe, there are also other example scripts which can be handy:
\section dnn1_training_script_internals Training script internals -The main neural network training script steps/nnet/train.sh is invoked as: +The main neural network training script steps/nnet/train.sh is invoked as: \verbatim steps/nnet/train.sh @@ -111,11 +111,11 @@ The is used only in the special case when using LDA feature-transform The output (i.e. the trained networks and logfiles) goes into . Internally the script prepares the feature+target pipelines, generates a neural-network prototype and initialization, creates feature_transform and calls the scheduler script -steps/nnet/train_scheduler.sh, +steps/nnet/train_scheduler.sh, which runs the training epochs and controls the learning rate. -While looking inside steps/nnet/train.sh we see: +While looking inside steps/nnet/train.sh we see: 1. CUDA is required, the scripts exit if no GPU was detected or was CUDA not compiled in (one can still use '--skip-cuda-check true' to run on CPU, but it is 10-20x slower) @@ -165,12 +165,12 @@ $ cat exp/dnn5b_pretrain-dbn_dnn/nnet.proto 7. the network is initialized by : \ref nnet-initialize.cc , the DBN gets prepended in the next step using \ref nnet-concat.cc -8. finally the training gets called by running scheduler script steps/nnet/train_scheduler.sh +8. finally the training gets called by running scheduler script steps/nnet/train_scheduler.sh Note : both neural networks and feature transforms can be viewed by \ref nnet-info.cc, or shown in ascii by \ref nnet-copy.cc -While looking inside steps/nnet/train_scheduler.sh we see: +While looking inside steps/nnet/train_scheduler.sh we see: the initial cross-validation run and the main for-loop over $iter which runs the epochs and controls the learning rate. Typically, the train_scheduler.sh is called from train.sh. - the default learning-rate scheduling is based on the relative improvement of the objective function: @@ -310,7 +310,7 @@ AddMat 174.307s AddMatMat 1922.11s \endverbatim - Running steps/nnet/train_scheduler.sh directly: + Running steps/nnet/train_scheduler.sh directly: - The script train_scheduler.sh can be called outside train.sh, it allows to override the default NN-input and NN-target streams, which can be handy. - However the script assumes everything is set-up correctly, and there are almost no sanity checks, which makes it suitable for more advanced users only. - It is highly recommended to have a look at how train_scheduler.sh is usually called before trying to call it directly. diff --git a/src/doc/history.dox b/src/doc/history.dox index 40d46c7e32f..0813f2331cc 100644 --- a/src/doc/history.dox +++ b/src/doc/history.dox @@ -54,7 +54,8 @@ Sandeep Boda, Sandeep Reddy and Haihua Xu (who helped with coding, code cleanup and documentation); we were visited by Michael Riley (who helped us to understand OpenFst and gave some lectures on FSTs), and would like to acknowledge the help of - Honza Cernocky (for allowing us to have the workshop and helping to organize it), + Honza Cernocky (for negotiating the venue and some support for the workshop from + the Faculty of Information Technology of BUT and helping to organize it), Renata Kohlova (administration), and Tomas Kasparek (system administration). It is possible that this list of contributors contains oversights; any important omissions are unlikely to be intentional. @@ -62,13 +63,16 @@ A lot of code was written during the summer of 2010 but we still did not have a complete working system. Some of the participants of the 2010 workshop continued working to complete the toolkit and get a working set of training scripts. - The code was released on May 14th, 2011. + The code was released on May 14th, 2011, and presented to public at ICASSP 2011 + in Prague, + + see the recordings. Since the initial release, Kaldi has been maintained and developed to a large extent by Daniel Povey, working at Microsoft Research until early 2012 and since then at Johns Hopkins University; but also with major contributions by others: notably Karel Vesely, who developed the neural-net training framework, - and Arnab Ghoshal, who co-ordinated the acoustic modeling work early on; but + and Arnab Ghoshal, who coordinated the acoustic modeling work early on; but also other major contributors whom we do not name here because it is too hard to determine where to cut off the list; and a long tail of minor contributors; the total number of people who have contributed code or scripts or patches is diff --git a/src/nnetbin/cuda-gpu-available.cc b/src/nnetbin/cuda-gpu-available.cc index 41d0227ce08..69637d3601a 100644 --- a/src/nnetbin/cuda-gpu-available.cc +++ b/src/nnetbin/cuda-gpu-available.cc @@ -40,6 +40,17 @@ void TestGpuComputation() { #endif int main(int argc, char *argv[]) try { + + /* only for Doxygen documentation, never shown in command line */ + const char *usage = + "Test if there is a GPU available, and if the GPU setup is correct.\n" + "A GPU is acquired and a small computation is done\n" + "(generating a random matrix and computing softmax for its rows).\n" + "\n" + "exit-code: 0 = success, 1 = compiled without GPU support, -1 = error\n" + "\n" + "Usage: cuda-gpu-available\n"; + char hostname[100] = "UNKNOWN-HOSTNAME"; #if !defined(_MSC_VER) && !defined(__CYGWIN__) if (gethostname(hostname, 100)) { From 7a1908f1a12bdabb969ea43988660cfd0add63bc Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 14 Mar 2019 20:34:45 -0400 Subject: [PATCH 082/235] [src] Various mostly-cosmetic changes (copying from another branch) (#3109) --- .../libs/nnet3/train/dropout_schedule.py | 20 +- egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py | 1 + egs/wsj/s5/steps/nnet3/xconfig_to_config.py | 106 +++++ src/base/io-funcs.h | 2 +- src/matrix/kaldi-matrix.h | 4 + src/nnet2bin/nnet-am-compute.cc | 4 +- src/nnet3/nnet-chain-training.cc | 56 +-- src/nnet3/nnet-chain-training.h | 14 +- src/nnet3/nnet-parse-test.cc | 189 --------- src/nnet3/nnet-parse.cc | 373 ------------------ src/nnet3/nnet-parse.h | 123 ------ src/nnet3/nnet-training.cc | 47 +-- src/nnet3/nnet-training.h | 8 +- src/nnet3/nnet-utils.cc | 43 +- src/nnet3/nnet-utils.h | 29 ++ src/util/text-utils-test.cc | 190 +++++++++ src/util/text-utils.cc | 248 ++++++++++++ src/util/text-utils.h | 92 +++++ 18 files changed, 759 insertions(+), 790 deletions(-) create mode 100755 egs/wsj/s5/steps/nnet3/xconfig_to_config.py diff --git a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py index 0ad93e5977d..0de9074517f 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py @@ -186,9 +186,22 @@ def _get_component_dropout(dropout_schedule, data_fraction): def _get_dropout_proportions(dropout_schedule, data_fraction): """Returns dropout proportions based on the dropout_schedule for the - fraction of data seen at this stage of training. + fraction of data seen at this stage of training. Returns a list of + pairs (pattern, dropout_proportion); for instance, it might return + the list ['*', 0.625] meaning a dropout proportion of 0.625 is to + be applied to all dropout components. + Returns None if dropout_schedule is None. + dropout_schedule might be (in the sample case using the default pattern of + '*'): '0.1,0.5@0.5,0.1', meaning a piecewise linear function that starts at + 0.1 when data_fraction=0.0, rises to 0.5 when data_fraction=0.5, and falls + again to 0.1 when data_fraction=1.0. It can also contain space-separated + items of the form 'pattern=schedule', for instance: + '*=0.0,0.5,0.0 lstm.*=0.0,0.3@0.75,0.0' + The more specific patterns should go later, otherwise they will be overridden + by the less specific patterns' commands. + Calls _get_component_dropout() for the different component name patterns in dropout_schedule. @@ -198,6 +211,7 @@ def _get_dropout_proportions(dropout_schedule, data_fraction): See _self_test() for examples. data_fraction: The fraction of data seen until this stage of training. + """ if dropout_schedule is None: return None @@ -213,6 +227,10 @@ def _get_dropout_proportions(dropout_schedule, data_fraction): def get_dropout_edit_string(dropout_schedule, data_fraction, iter_): """Return an nnet3-copy --edits line to modify raw_model_string to set dropout proportions according to dropout_proportions. + E.g. if _dropout_proportions(dropout_schedule, data_fraction) + returns [('*', 0.625)], this will return the string: + "nnet3-copy --edits='set-dropout-proportion name=* proportion=0.625'" + Arguments: dropout_schedule: Value for the --trainer.dropout-schedule option. diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py index 5ac2ed59003..b540423e3cd 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py @@ -27,6 +27,7 @@ 'relu-batchnorm-layer' : xlayers.XconfigBasicLayer, 'relu-batchnorm-so-layer' : xlayers.XconfigBasicLayer, 'batchnorm-so-relu-layer' : xlayers.XconfigBasicLayer, + 'batchnorm-layer' : xlayers.XconfigBasicLayer, 'sigmoid-layer' : xlayers.XconfigBasicLayer, 'tanh-layer' : xlayers.XconfigBasicLayer, 'fixed-affine-layer' : xlayers.XconfigFixedAffineLayer, diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_config.py b/egs/wsj/s5/steps/nnet3/xconfig_to_config.py new file mode 100755 index 00000000000..952745cea9f --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_config.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +# Copyright 2016-2018 Johns Hopkins University (Dan Povey) +# 2016 Vijayaditya Peddinti +# 2017 Google Inc. (vpeddinti@google.com) +# Apache 2.0. + +# This is like xconfig_to_configs.py but with a simpler interface; it writes +# to a single named file. + + +import argparse +import os +import sys +from collections import defaultdict + +sys.path.insert(0, 'steps/') +# the following is in case we weren't running this from the normal directory. +sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/') + +import libs.nnet3.xconfig.parser as xparser +import libs.common as common_lib + + +def get_args(): + # we add compulsory arguments as named arguments for readability + parser = argparse.ArgumentParser( + description="Reads an xconfig file and creates config files " + "for neural net creation and training", + epilog='Search egs/*/*/local/{nnet3,chain}/*sh for examples') + parser.add_argument('--xconfig-file', required=True, + help='Filename of input xconfig file') + parser.add_argument('--existing-model', + help='Filename of previously trained neural net ' + '(e.g. final.mdl) which is useful in case of ' + 'using nodes from list of component-nodes in ' + 'already trained model ' + 'to generate new config file for new model.' + 'The context info is also generated using ' + 'a model generated by adding final.config ' + 'to the existing model.' + 'e.g. In Transfer learning: generate new model using ' + 'component nodes in existing model.') + parser.add_argument('--config-file-out', required=True, + help='Filename to write nnet config file.'); + parser.add_argument('--nnet-edits', type=str, default=None, + action=common_lib.NullstrToNoneAction, + help="""This option is useful in case the network you + are creating does not have an output node called + 'output' (e.g. for multilingual setups). You can set + this to an edit-string like: 'rename-node old-name=xxx + new-name=output' if node xxx plays the role of the + output node in this network. This is only used for + computing the left/right context.""") + + print(' '.join(sys.argv), file=sys.stderr) + + args = parser.parse_args() + + return args + + + +def write_config_file(config_file_out, all_layers): + # config_basename_to_lines is map from the basename of the + # config, as a string (i.e. 'ref', 'all', 'init') to a list of + # strings representing lines to put in the config file. + config_basename_to_lines = defaultdict(list) + + for layer in all_layers: + try: + pairs = layer.get_full_config() + for config_basename, line in pairs: + config_basename_to_lines[config_basename].append(line) + except Exception as e: + print("{0}: error producing config lines from xconfig " + "line '{1}': error was: {2}".format(sys.argv[0], + str(layer), repr(e)), + file=sys.stderr) + # we use raise rather than raise(e) as using a blank raise + # preserves the backtrace + raise + + with open(config_file_out, 'w') as f: + print('# This file was created by the command:\n' + '# {0} '.format(sys.argv), file=f) + lines = config_basename_to_lines['final'] + for line in lines: + print(line, file=f) + + +def main(): + args = get_args() + existing_layers = [] + if args.existing_model is not None: + existing_layers = xparser.get_model_component_info(args.existing_model) + all_layers = xparser.read_xconfig_file(args.xconfig_file, existing_layers) + write_config_file(args.config_file_out, all_layers) + + +if __name__ == '__main__': + main() + + +# test: +# (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)') >xconfig; steps/nnet3/xconfig_to_config.py --xconfig-file=xconfig --config-file-out=foo diff --git a/src/base/io-funcs.h b/src/base/io-funcs.h index b3015905785..6396967f56b 100644 --- a/src/base/io-funcs.h +++ b/src/base/io-funcs.h @@ -108,7 +108,7 @@ namespace kaldi { it doesn't throw. It's useful if a class can have various forms based on typedefs and virtual classes, and wants to know which version to read. - ReadToken allow the caller to obtain the next token. PeekToken works just + ReadToken allows the caller to obtain the next token. PeekToken works just like ReadToken, but seeks back to the beginning of the token. A subsequent call to ReadToken will read the same token again. This is useful when different object types are written to the same file; using PeekToken one can diff --git a/src/matrix/kaldi-matrix.h b/src/matrix/kaldi-matrix.h index 11a5e08b15d..d7ee8eb388f 100644 --- a/src/matrix/kaldi-matrix.h +++ b/src/matrix/kaldi-matrix.h @@ -531,6 +531,10 @@ class MatrixBase { * positive semi-definite (check_thresh controls how stringent the check is; * set it to 2 to ensure it won't ever complain, but it will zero out negative * dimensions in your matrix. + * + * Caution: if you want the eigenvalues, it may make more sense to convert to + * SpMatrix and use Eig() function there, which uses eigenvalue decomposition + * directly rather than SVD. */ void SymPosSemiDefEig(VectorBase *s, MatrixBase *P, Real check_thresh = 0.001); diff --git a/src/nnet2bin/nnet-am-compute.cc b/src/nnet2bin/nnet-am-compute.cc index fe07f9c6a25..2b50f7cc656 100644 --- a/src/nnet2bin/nnet-am-compute.cc +++ b/src/nnet2bin/nnet-am-compute.cc @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) { int64 num_done = 0, num_frames = 0; Vector inv_priors(am_nnet.Priors()); - KALDI_ASSERT(!divide_by_priors || inv_priors.Dim() == am_nnet.NumPdfs() && + KALDI_ASSERT((!divide_by_priors || inv_priors.Dim() == am_nnet.NumPdfs()) && "Priors in neural network not set up."); inv_priors.ApplyPow(-1.0); @@ -159,5 +159,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index cccb1110d3c..d9562887817 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -33,6 +33,7 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts, compiler_(*nnet, opts_.nnet_config.optimize_config, opts_.nnet_config.compiler_config), num_minibatches_processed_(0), + max_change_stats_(*nnet), srand_seed_(RandInt(0, 100000)) { if (opts.nnet_config.zero_component_stats) ZeroComponentStats(nnet); @@ -41,9 +42,6 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts, opts.nnet_config.backstitch_training_interval > 0); delta_nnet_ = nnet_->Copy(); ScaleNnet(0.0, delta_nnet_); - const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); - num_max_change_per_component_applied_.resize(num_updatable, 0); - num_max_change_global_applied_ = 0; if (opts.nnet_config.read_cache != "") { bool binary; @@ -111,17 +109,19 @@ void NnetChainTrainer::TrainInternal(const NnetChainExample &eg, this->ProcessOutputs(false, eg, &computer); computer.Run(); - // If relevant, add in the part of the gradient that comes from L2 - // regularization. + // If relevant, add in the part of the gradient that comes from + // parameter-level L2 regularization. ApplyL2Regularization(*nnet_, GetNumNvalues(eg.inputs, false) * nnet_config.l2_regularize_factor, delta_nnet_); // Updates the parameters of nnet - bool success = UpdateNnetWithMaxChange(*delta_nnet_, - nnet_config.max_param_change, 1.0, 1.0 - nnet_config.momentum, nnet_, - &num_max_change_per_component_applied_, &num_max_change_global_applied_); + bool success = UpdateNnetWithMaxChange( + *delta_nnet_, + nnet_config.max_param_change, + 1.0, 1.0 - nnet_config.momentum, nnet_, + &max_change_stats_); // Scale down the batchnorm stats (keeps them fresh... this affects what // happens when we use the model with batchnorm test-mode set). @@ -176,9 +176,10 @@ void NnetChainTrainer::TrainInternalBackstitch(const NnetChainExample &eg, } // Updates the parameters of nnet - UpdateNnetWithMaxChange(*delta_nnet_, - nnet_config.max_param_change, max_change_scale, scale_adding, nnet_, - &num_max_change_per_component_applied_, &num_max_change_global_applied_); + UpdateNnetWithMaxChange( + *delta_nnet_, nnet_config.max_param_change, + max_change_scale, scale_adding, nnet_, + &max_change_stats_); if (is_backstitch_step1) { // The following will only do something if we have a LinearComponent or @@ -276,41 +277,10 @@ bool NnetChainTrainer::PrintTotalStats() const { const ObjectiveFunctionInfo &info = iter->second; ans = info.PrintTotalStats(name) || ans; } - PrintMaxChangeStats(); + max_change_stats_.Print(*nnet_); return ans; } -void NnetChainTrainer::PrintMaxChangeStats() const { - KALDI_ASSERT(delta_nnet_ != NULL); - const NnetTrainerOptions &nnet_config = opts_.nnet_config; - int32 i = 0; - for (int32 c = 0; c < delta_nnet_->NumComponents(); c++) { - Component *comp = delta_nnet_->GetComponent(c); - if (comp->Properties() & kUpdatableComponent) { - UpdatableComponent *uc = dynamic_cast(comp); - if (uc == NULL) - KALDI_ERR << "Updatable component does not inherit from class " - << "UpdatableComponent; change this code."; - if (num_max_change_per_component_applied_[i] > 0) - KALDI_LOG << "For " << delta_nnet_->GetComponentName(c) - << ", per-component max-change was enforced " - << (100.0 * num_max_change_per_component_applied_[i]) / - (num_minibatches_processed_ * - (nnet_config.backstitch_training_scale == 0.0 ? 1.0 : - 1.0 + 1.0 / nnet_config.backstitch_training_interval)) - << " % of the time."; - i++; - } - } - if (num_max_change_global_applied_ > 0) - KALDI_LOG << "The global max-change was enforced " - << (100.0 * num_max_change_global_applied_) / - (num_minibatches_processed_ * - (nnet_config.backstitch_training_scale == 0.0 ? 1.0 : - 1.0 + 1.0 / nnet_config.backstitch_training_interval)) - << " % of the time."; -} - NnetChainTrainer::~NnetChainTrainer() { if (opts_.nnet_config.write_cache != "") { Output ko(opts_.nnet_config.write_cache, opts_.nnet_config.binary_write_cache); diff --git a/src/nnet3/nnet-chain-training.h b/src/nnet3/nnet-chain-training.h index 5bf6a3f6fce..bc5143491ac 100644 --- a/src/nnet3/nnet-chain-training.h +++ b/src/nnet3/nnet-chain-training.h @@ -64,10 +64,6 @@ class NnetChainTrainer { // Prints out the final stats, and return true if there was a nonzero count. bool PrintTotalStats() const; - // Prints out the max-change stats (if nonzero): the percentage of time that - // per-component max-change and global max-change were enforced. - void PrintMaxChangeStats() const; - ~NnetChainTrainer(); private: // The internal function for doing one step of conventional SGD training. @@ -88,11 +84,8 @@ class NnetChainTrainer { chain::DenominatorGraph den_graph_; Nnet *nnet_; - Nnet *delta_nnet_; // Only used if momentum != 0.0 or max-param-change != - // 0.0. nnet representing accumulated parameter-change - // (we'd call this gradient_nnet_, but due to - // natural-gradient update, it's better to consider it as - // a delta-parameter nnet. + Nnet *delta_nnet_; // stores the change to the parameters on each training + // iteration. CachingOptimizingCompiler compiler_; // This code supports multiple output layers, even though in the @@ -101,8 +94,7 @@ class NnetChainTrainer { int32 num_minibatches_processed_; // stats for max-change. - std::vector num_max_change_per_component_applied_; - int32 num_max_change_global_applied_; + MaxChangeStats max_change_stats_; unordered_map objf_info_; diff --git a/src/nnet3/nnet-parse-test.cc b/src/nnet3/nnet-parse-test.cc index babdbbdcb0e..5ae4917dba6 100644 --- a/src/nnet3/nnet-parse-test.cc +++ b/src/nnet3/nnet-parse-test.cc @@ -23,193 +23,6 @@ namespace kaldi { namespace nnet3 { -void UnitTestConfigLineParse() { - std::string str; - { - ConfigLine cfl; - str = "a-b xx=yyy foo=bar baz=123 ba=1:2"; - bool status = cfl.ParseLine(str); - KALDI_ASSERT(status && cfl.FirstToken() == "a-b"); - - KALDI_ASSERT(cfl.HasUnusedValues()); - std::string str_value; - KALDI_ASSERT(cfl.GetValue("xx", &str_value)); - KALDI_ASSERT(str_value == "yyy"); - KALDI_ASSERT(cfl.HasUnusedValues()); - KALDI_ASSERT(cfl.GetValue("foo", &str_value)); - KALDI_ASSERT(str_value == "bar"); - KALDI_ASSERT(cfl.HasUnusedValues()); - KALDI_ASSERT(!cfl.GetValue("xy", &str_value)); - KALDI_ASSERT(cfl.GetValue("baz", &str_value)); - KALDI_ASSERT(str_value == "123"); - - std::vector int_values; - KALDI_ASSERT(!cfl.GetValue("xx", &int_values)); - KALDI_ASSERT(cfl.GetValue("baz", &int_values)); - KALDI_ASSERT(cfl.HasUnusedValues()); - KALDI_ASSERT(int_values.size() == 1 && int_values[0] == 123); - KALDI_ASSERT(cfl.GetValue("ba", &int_values)); - KALDI_ASSERT(int_values.size() == 2 && int_values[0] == 1 && int_values[1] == 2); - KALDI_ASSERT(!cfl.HasUnusedValues()); - } - - { - ConfigLine cfl; - str = "a-b baz=x y z pp = qq ab =cd ac= bd"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "a-b baz=x y z pp = qq ab=cd ac=bd"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "foo-bar"; - KALDI_ASSERT(cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "foo-bar a=b c d f=g"; - std::string value; - KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "foo-bar" && - cfl.GetValue("a", &value) && value == "b c d" && - cfl.GetValue("f", &value) && value == "g" && - !cfl.HasUnusedValues()); - } - { - ConfigLine cfl; - str = "zzz a=b baz"; - KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "zzz" && - cfl.UnusedValues() == "a=b baz"); - } - { - ConfigLine cfl; - str = "xxx a=b baz "; - KALDI_ASSERT(cfl.ParseLine(str) && cfl.UnusedValues() == "a=b baz"); - } - { - ConfigLine cfl; - str = "xxx a=b =c"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "xxx baz='x y z' pp=qq ab=cd ac=bd"; - KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "xxx"); - std::string str_value; - KALDI_ASSERT(cfl.GetValue("baz", &str_value)); - KALDI_ASSERT(str_value == "x y z"); - KALDI_ASSERT(cfl.GetValue("pp", &str_value)); - KALDI_ASSERT(str_value == "qq"); - KALDI_ASSERT(cfl.UnusedValues() == "ab=cd ac=bd"); - KALDI_ASSERT(cfl.GetValue("ab", &str_value)); - KALDI_ASSERT(str_value == "cd"); - KALDI_ASSERT(cfl.UnusedValues() == "ac=bd"); - KALDI_ASSERT(cfl.HasUnusedValues()); - KALDI_ASSERT(cfl.GetValue("ac", &str_value)); - KALDI_ASSERT(str_value == "bd"); - KALDI_ASSERT(!cfl.HasUnusedValues()); - } - - { - ConfigLine cfl; - str = "x baz= pp = qq flag=t "; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = " x baz= pp=qq flag=t "; - KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "x"); - - std::string str_value; - KALDI_ASSERT(cfl.GetValue("baz", &str_value)); - KALDI_ASSERT(str_value == ""); - KALDI_ASSERT(cfl.GetValue("pp", &str_value)); - KALDI_ASSERT(str_value == "qq"); - KALDI_ASSERT(cfl.HasUnusedValues()); - KALDI_ASSERT(cfl.GetValue("flag", &str_value)); - KALDI_ASSERT(str_value == "t"); - KALDI_ASSERT(!cfl.HasUnusedValues()); - - bool bool_value = false; - KALDI_ASSERT(cfl.GetValue("flag", &bool_value)); - KALDI_ASSERT(bool_value); - } - - { - ConfigLine cfl; - str = "xx _baz=a -pp=qq"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "xx 0baz=a pp=qq"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "xx -baz=a pp=qq"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = "xx _baz'=a pp=qq"; - KALDI_ASSERT(!cfl.ParseLine(str)); - } - { - ConfigLine cfl; - str = " baz=g"; - KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == ""); - bool flag; - KALDI_ASSERT(!cfl.GetValue("baz", &flag)); - } - { - ConfigLine cfl; - str = "xx _baz1=a pp=qq"; - KALDI_ASSERT(cfl.ParseLine(str)); - - std::string str_value; - KALDI_ASSERT(cfl.GetValue("_baz1", &str_value)); - } -} - -void UnitTestReadConfig() { - std::string str = "a-b alpha=aa beta=\"b b\"# String test\n" - "a-b beta2='b c' beta3=bd # \n" - "a-b gamma=1:2:3:4 # Int Vector test\n" - " a-b de1ta=f # Bool + Integer in key Comment test delta=t \n" - "a-b _epsilon=-1 # Int Vector test _epsilon=1 \n" - "a-b zet-_a=0.15 theta=1.1# Float, -, _ test\n" - "a-b quoted='a b c' # quoted string\n" - "a-b quoted2=\"d e 'a b=c' f\" # string quoted with double quotes"; - - std::istringstream is(str); - std::vector lines; - ReadConfigLines(is, &lines); - KALDI_ASSERT(lines.size() == 8); - - ConfigLine cfl; - for (size_t i = 0; i < lines.size(); i++) { - KALDI_ASSERT(cfl.ParseLine(lines[i]) && cfl.FirstToken() == "a-b"); - if (i == 1) { - KALDI_ASSERT(cfl.GetValue("beta2", &str) && str == "b c"); - } - if (i == 4) { - KALDI_ASSERT(cfl.GetValue("_epsilon", &str) && str == "-1"); - } - if (i == 5) { - BaseFloat float_val = 0; - KALDI_ASSERT(cfl.GetValue("zet-_a", &float_val) && ApproxEqual(float_val, 0.15)); - } - if (i == 6) { - KALDI_ASSERT(cfl.GetValue("quoted", &str) && str == "a b c"); - } - if (i == 7) { - KALDI_ASSERT(cfl.GetValue("quoted2", &str) && str == "d e 'a b=c' f"); - } - } -} void UnitTestDescriptorTokenize() { std::vector lines; @@ -281,8 +94,6 @@ int main() { using namespace kaldi; using namespace kaldi::nnet3; - UnitTestConfigLineParse(); - UnitTestReadConfig(); UnitTestDescriptorTokenize(); UnitTestSummarizeVector(); UnitTestNameMatchesPattern(); diff --git a/src/nnet3/nnet-parse.cc b/src/nnet3/nnet-parse.cc index a51bba21484..17dec23e7c1 100644 --- a/src/nnet3/nnet-parse.cc +++ b/src/nnet3/nnet-parse.cc @@ -27,353 +27,6 @@ namespace kaldi { namespace nnet3 { - -bool ConfigLine::ParseLine(const std::string &line) { - data_.clear(); - whole_line_ = line; - if (line.size() == 0) return false; // Empty line - size_t pos = 0, size = line.size(); - while (isspace(line[pos]) && pos < size) pos++; - if (pos == size) - return false; // whitespace-only line - size_t first_token_start_pos = pos; - // first get first_token_. - while (!isspace(line[pos]) && pos < size) { - if (line[pos] == '=') { - // If the first block of non-whitespace looks like "foo-bar=...", - // then we ignore it: there is no initial token, and FirstToken() - // is empty. - pos = first_token_start_pos; - break; - } - pos++; - } - first_token_ = std::string(line, first_token_start_pos, pos - first_token_start_pos); - // first_token_ is expected to be either empty or something like - // "component-node", which actually is a slightly more restrictive set of - // strings than IsValidName() checks for this is a convenient way to check it. - if (!first_token_.empty() && !IsValidName(first_token_)) - return false; - - while (pos < size) { - if (isspace(line[pos])) { - pos++; - continue; - } - - // OK, at this point we know that we are pointing at nonspace. - size_t next_equals_sign = line.find_first_of("=", pos); - if (next_equals_sign == pos || next_equals_sign == std::string::npos) { - // we're looking for something like 'key=value'. If there is no equals sign, - // or it's not preceded by something, it's a parsing failure. - return false; - } - std::string key(line, pos, next_equals_sign - pos); - if (!IsValidName(key)) return false; - - // handle any quotes. we support key='blah blah' or key="foo bar". - // no escaping is supported. - if (line[next_equals_sign+1] == '\'' || line[next_equals_sign+1] == '"') { - char my_quote = line[next_equals_sign+1]; - size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2); - if (next_quote == std::string::npos) { // no matching quote was found. - KALDI_WARN << "No matching quote for " << my_quote << " in config line '" - << line << "'"; - return false; - } else { - std::string value(line, next_equals_sign + 2, - next_quote - next_equals_sign - 2); - data_.insert(std::make_pair(key, std::make_pair(value, false))); - pos = next_quote + 1; - continue; - } - } else { - // we want to be able to parse something like "... input=Offset(a, -1) foo=bar": - // in general, config values with spaces in them, even without quoting. - - size_t next_next_equals_sign = line.find_first_of("=", next_equals_sign + 1), - terminating_space = size; - - if (next_next_equals_sign != std::string::npos) { // found a later equals sign. - size_t preceding_space = line.find_last_of(" \t", next_next_equals_sign); - if (preceding_space != std::string::npos && - preceding_space > next_equals_sign) - terminating_space = preceding_space; - } - while (isspace(line[terminating_space - 1]) && terminating_space > 0) - terminating_space--; - - std::string value(line, next_equals_sign + 1, - terminating_space - (next_equals_sign + 1)); - data_.insert(std::make_pair(key, std::make_pair(value, false))); - pos = terminating_space; - } - } - return true; -} - -bool ConfigLine::GetValue(const std::string &key, std::string *value) { - KALDI_ASSERT(value != NULL); - std::map >::iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (it->first == key) { - *value = (it->second).first; - (it->second).second = true; - return true; - } - } - return false; -} - -bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) { - KALDI_ASSERT(value != NULL); - std::map >::iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (it->first == key) { - if (!ConvertStringToReal((it->second).first, value)) - return false; - (it->second).second = true; - return true; - } - } - return false; -} - -bool ConfigLine::GetValue(const std::string &key, int32 *value) { - KALDI_ASSERT(value != NULL); - std::map >::iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (it->first == key) { - if (!ConvertStringToInteger((it->second).first, value)) - return false; - (it->second).second = true; - return true; - } - } - return false; -} - -bool ConfigLine::GetValue(const std::string &key, std::vector *value) { - KALDI_ASSERT(value != NULL); - value->clear(); - std::map >::iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (it->first == key) { - if (!SplitStringToIntegers((it->second).first, ":,", true, value)) { - // KALDI_WARN << "Bad option " << (it->second).first; - return false; - } - (it->second).second = true; - return true; - } - } - return false; -} - -bool ConfigLine::GetValue(const std::string &key, bool *value) { - KALDI_ASSERT(value != NULL); - std::map >::iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (it->first == key) { - if ((it->second).first.size() == 0) return false; - switch (((it->second).first)[0]) { - case 'F': - case 'f': - *value = false; - break; - case 'T': - case 't': - *value = true; - break; - default: - return false; - } - (it->second).second = true; - return true; - } - } - return false; -} - -bool ConfigLine::HasUnusedValues() const { - std::map >::const_iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (!(it->second).second) return true; - } - return false; -} - -std::string ConfigLine::UnusedValues() const { - std::string unused_str; - std::map >::const_iterator it = data_.begin(); - for (; it != data_.end(); ++it) { - if (!(it->second).second) { - if (unused_str == "") - unused_str = it->first + "=" + (it->second).first; - else - unused_str += " " + it->first + "=" + (it->second).first; - } - } - return unused_str; -} - -// This is like ExpectToken but for two tokens, and it -// will either accept token1 and then token2, or just token2. -// This is useful in Read functions where the first token -// may already have been consumed. -void ExpectOneOrTwoTokens(std::istream &is, bool binary, - const std::string &token1, - const std::string &token2) { - KALDI_ASSERT(token1 != token2); - std::string temp; - ReadToken(is, binary, &temp); - if (temp == token1) { - ExpectToken(is, binary, token2); - } else { - if (temp != token2) { - KALDI_ERR << "Expecting token " << token1 << " or " << token2 - << " but got " << temp; - } - } -} - -// static -bool ParseFromString(const std::string &name, std::string *string, - int32 *param) { - std::vector split_string; - SplitStringToVector(*string, " \t", true, - &split_string); - std::string name_equals = name + "="; // the name and then the equals sign. - size_t len = name_equals.length(); - - for (size_t i = 0; i < split_string.size(); i++) { - if (split_string[i].compare(0, len, name_equals) == 0) { - if (!ConvertStringToInteger(split_string[i].substr(len), param)) - KALDI_ERR << "Bad option " << split_string[i]; - *string = ""; - // Set "string" to all the pieces but the one we used. - for (size_t j = 0; j < split_string.size(); j++) { - if (j != i) { - if (!string->empty()) *string += " "; - *string += split_string[j]; - } - } - return true; - } - } - return false; -} - -bool ParseFromString(const std::string &name, std::string *string, - bool *param) { - std::vector split_string; - SplitStringToVector(*string, " \t", true, - &split_string); - std::string name_equals = name + "="; // the name and then the equals sign. - size_t len = name_equals.length(); - - for (size_t i = 0; i < split_string.size(); i++) { - if (split_string[i].compare(0, len, name_equals) == 0) { - std::string b = split_string[i].substr(len); - if (b.empty()) - KALDI_ERR << "Bad option " << split_string[i]; - if (b[0] == 'f' || b[0] == 'F') *param = false; - else if (b[0] == 't' || b[0] == 'T') *param = true; - else - KALDI_ERR << "Bad option " << split_string[i]; - *string = ""; - // Set "string" to all the pieces but the one we used. - for (size_t j = 0; j < split_string.size(); j++) { - if (j != i) { - if (!string->empty()) *string += " "; - *string += split_string[j]; - } - } - return true; - } - } - return false; -} - -bool ParseFromString(const std::string &name, std::string *string, - BaseFloat *param) { - std::vector split_string; - SplitStringToVector(*string, " \t", true, - &split_string); - std::string name_equals = name + "="; // the name and then the equals sign. - size_t len = name_equals.length(); - - for (size_t i = 0; i < split_string.size(); i++) { - if (split_string[i].compare(0, len, name_equals) == 0) { - if (!ConvertStringToReal(split_string[i].substr(len), param)) - KALDI_ERR << "Bad option " << split_string[i]; - *string = ""; - // Set "string" to all the pieces but the one we used. - for (size_t j = 0; j < split_string.size(); j++) { - if (j != i) { - if (!string->empty()) *string += " "; - *string += split_string[j]; - } - } - return true; - } - } - return false; -} - -bool ParseFromString(const std::string &name, std::string *string, - std::string *param) { - std::vector split_string; - SplitStringToVector(*string, " \t", true, - &split_string); - std::string name_equals = name + "="; // the name and then the equals sign. - size_t len = name_equals.length(); - - for (size_t i = 0; i < split_string.size(); i++) { - if (split_string[i].compare(0, len, name_equals) == 0) { - *param = split_string[i].substr(len); - - // Set "string" to all the pieces but the one we used. - *string = ""; - for (size_t j = 0; j < split_string.size(); j++) { - if (j != i) { - if (!string->empty()) *string += " "; - *string += split_string[j]; - } - } - return true; - } - } - return false; -} - -bool ParseFromString(const std::string &name, std::string *string, - std::vector *param) { - std::vector split_string; - SplitStringToVector(*string, " \t", true, - &split_string); - std::string name_equals = name + "="; // the name and then the equals sign. - size_t len = name_equals.length(); - - for (size_t i = 0; i < split_string.size(); i++) { - if (split_string[i].compare(0, len, name_equals) == 0) { - if (!SplitStringToIntegers(split_string[i].substr(len), ":,", - false, param)) - KALDI_ERR << "Bad option " << split_string[i]; - *string = ""; - // Set "string" to all the pieces but the one we used. - for (size_t j = 0; j < split_string.size(); j++) { - if (j != i) { - if (!string->empty()) *string += " "; - *string += split_string[j]; - } - } - return true; - } - } - return false; -} - bool DescriptorTokenize(const std::string &input, std::vector *tokens) { KALDI_ASSERT(tokens != NULL); @@ -422,32 +75,6 @@ bool DescriptorTokenize(const std::string &input, return true; } -bool IsValidName(const std::string &name) { - if (name.size() == 0) return false; - for (size_t i = 0; i < name.size(); i++) { - if (i == 0 && !isalpha(name[i]) && name[i] != '_') - return false; - if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.') - return false; - } - return true; -} - -void ReadConfigLines(std::istream &is, - std::vector *lines) { - KALDI_ASSERT(lines != NULL); - std::string line; - while (std::getline(is, line)) { - if (line.size() == 0) continue; - size_t start = line.find_first_not_of(" \t"); - size_t end = line.find_first_of('#'); - if (start == std::string::npos || start == end) continue; - end = line.find_last_not_of(" \t", end - 1); - KALDI_ASSERT(end >= start); - lines->push_back(line.substr(start, end - start + 1)); - } -} - std::string ErrorContext(std::istream &is) { if (!is.good()) return "end of line"; char buf[21]; diff --git a/src/nnet3/nnet-parse.h b/src/nnet3/nnet-parse.h index a073a54f7e0..0fc19d51f6c 100644 --- a/src/nnet3/nnet-parse.h +++ b/src/nnet3/nnet-parse.h @@ -26,103 +26,6 @@ namespace kaldi { namespace nnet3 { -/** - This class is responsible for parsing input like - hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" - and giving you access to the fields, in this case - - FirstToken() == "hi-there", and key->value pairs: - - xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123", - bing->"a b c", baz->"a b c d='a b' e" - - The first token is optional, if the line started with a key-value pair then - FirstValue() will be empty. - - Note: it can parse value fields with space inside them only if they are free of the '=' - character. If values are going to contain the '=' character, you need to quote them - with either single or double quotes. - - Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_. - */ -class ConfigLine { - public: - // Tries to parse the line as a config-file line. Returns false - // if it could not for some reason, e.g. parsing failure. In most cases - // prints no warnings; the user should do this. Does not expect comments. - bool ParseLine(const std::string &line); - - // the GetValue functions are overloaded for various types. They return true - // if the key exists with value that can be converted to that type, and false - // otherwise. They also mark the key-value pair as having been read. It is - // not an error to read values twice. - bool GetValue(const std::string &key, std::string *value); - bool GetValue(const std::string &key, BaseFloat *value); - bool GetValue(const std::string &key, int32 *value); - // Values may be separated by ":" or by ",". - bool GetValue(const std::string &key, std::vector *value); - bool GetValue(const std::string &key, bool *value); - - bool HasUnusedValues() const; - /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one - /// of the GetValue() functions. - std::string UnusedValues() const; - - const std::string &FirstToken() const { return first_token_; } - - const std::string WholeLine() { return whole_line_; } - // use default assignment operator and copy constructor. - private: - std::string whole_line_; - // the first token of the line, e.g. if line is - // foo-bar baz=bing - // then first_token_ would be "foo-bar". - std::string first_token_; - - // data_ maps from key to (value, is-this-value-consumed?). - std::map > data_; - -}; - -// Note: the ParseFromString functions are to be removed after we switch over to -// using the ConfigLine mechanism. - - -/// \file nnet-parse.h -/// This header contains a few parsing-related functions that are used -/// while reading parsing neural network files and config files. - -/// Function used in Init routines. Suppose name=="foo", if "string" has a -/// field like foo=12, this function will set "param" to 12 and remove that -/// element from "string". It returns true if the parameter was read. -bool ParseFromString(const std::string &name, std::string *string, - int32 *param); - -/// This version of ParseFromString is for parameters of type BaseFloat. -bool ParseFromString(const std::string &name, std::string *string, - BaseFloat *param); - -/// This version of ParseFromString is for parameters of type bool, which can -/// appear as any string beginning with f, F, t or T. -bool ParseFromString(const std::string &name, std::string *string, - bool *param); - -/// This version of ParseFromString is for parsing strings. (these -/// should not contain space). -bool ParseFromString(const std::string &name, std::string *string, - std::string *param); - -/// This version of ParseFromString handles colon-separated or comma-separated -/// lists of integers. -bool ParseFromString(const std::string &name, std::string *string, - std::vector *param); - -/// This function is like ExpectToken but for two tokens, and it will either -/// accept token1 and then token2, or just token2. This is useful in Read -/// functions where the first token may already have been consumed. -void ExpectOneOrTwoTokens(std::istream &is, bool binary, - const std::string &token1, - const std::string &token2); /** This function tokenizes input when parsing Descriptor configuration @@ -142,32 +45,6 @@ void ExpectOneOrTwoTokens(std::istream &is, bool binary, bool DescriptorTokenize(const std::string &input, std::vector *tokens); -/// Returns true if 'name' would be a valid name for a component or node in a -/// Nnet. This is a nonempty string beginning with A-Za-z_, and containing only -/// '-', '_', '.', A-Z, a-z, or 0-9. -bool IsValidName(const std::string &name); - - -/** - This function reads in a config file and *appends* its contents to a vector of - lines; it is responsible for removing comments (anything after '#') and - stripping out any lines that contain only whitespace after comment removal. - */ -void ReadConfigLines(std::istream &is, - std::vector *lines); - - -/** - This function converts config-lines from a simple sequence of strings - as output by ReadConfigLines(), into a sequence of first-tokens and - name-value pairs. The general format is: - "command-type bar=baz xx=yyy" - etc., although there are subtleties as to what exactly is allowed, see - documentation for class ConfigLine for details. - This function will die if there was a parsing failure. - */ -void ParseConfigLines(const std::vector &lines, - std::vector *config_lines); /* Returns true if name 'name' matches pattern 'pattern'. The pattern diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 820644470c7..b4563c7a2c3 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -30,6 +30,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, nnet_(nnet), compiler_(*nnet, config_.optimize_config, config_.compiler_config), num_minibatches_processed_(0), + max_change_stats_(*nnet), srand_seed_(RandInt(0, 100000)) { if (config.zero_component_stats) ZeroComponentStats(nnet); @@ -38,9 +39,6 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, config.backstitch_training_interval > 0); delta_nnet_ = nnet_->Copy(); ScaleNnet(0.0, delta_nnet_); - const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); - num_max_change_per_component_applied_.resize(num_updatable, 0); - num_max_change_global_applied_ = 0; if (config_.read_cache != "") { bool binary; @@ -111,9 +109,9 @@ void NnetTrainer::TrainInternal(const NnetExample &eg, delta_nnet_); // Update the parameters of nnet - bool success = UpdateNnetWithMaxChange(*delta_nnet_, config_.max_param_change, - 1.0, 1.0 - config_.momentum, nnet_, - &num_max_change_per_component_applied_, &num_max_change_global_applied_); + bool success = UpdateNnetWithMaxChange( + *delta_nnet_, config_.max_param_change, + 1.0, 1.0 - config_.momentum, nnet_, &max_change_stats_); // Scale down the batchnorm stats (keeps them fresh... this affects what // happens when we use the model with batchnorm test-mode set). @@ -167,9 +165,10 @@ void NnetTrainer::TrainInternalBackstitch(const NnetExample &eg, } // Updates the parameters of nnet - UpdateNnetWithMaxChange(*delta_nnet_, config_.max_param_change, + UpdateNnetWithMaxChange( + *delta_nnet_, config_.max_param_change, max_change_scale, scale_adding, nnet_, - &num_max_change_per_component_applied_, &num_max_change_global_applied_); + &max_change_stats_); if (is_backstitch_step1) { // The following will only do something if we have a LinearComponent or @@ -236,40 +235,10 @@ bool NnetTrainer::PrintTotalStats() const { bool ok = info.PrintTotalStats(name); ans = ans || ok; } - PrintMaxChangeStats(); + max_change_stats_.Print(*nnet_); return ans; } -void NnetTrainer::PrintMaxChangeStats() const { - KALDI_ASSERT(delta_nnet_ != NULL); - int32 i = 0; - for (int32 c = 0; c < delta_nnet_->NumComponents(); c++) { - Component *comp = delta_nnet_->GetComponent(c); - if (comp->Properties() & kUpdatableComponent) { - UpdatableComponent *uc = dynamic_cast(comp); - if (uc == NULL) - KALDI_ERR << "Updatable component does not inherit from class " - << "UpdatableComponent; change this code."; - if (num_max_change_per_component_applied_[i] > 0) - KALDI_LOG << "For " << delta_nnet_->GetComponentName(c) - << ", per-component max-change was enforced " - << (100.0 * num_max_change_per_component_applied_[i]) / - (num_minibatches_processed_ * - (config_.backstitch_training_scale == 0.0 ? 1.0 : - 1.0 + 1.0 / config_.backstitch_training_interval)) - << " % of the time."; - i++; - } - } - if (num_max_change_global_applied_ > 0) - KALDI_LOG << "The global max-change was enforced " - << (100.0 * num_max_change_global_applied_) / - (num_minibatches_processed_ * - (config_.backstitch_training_scale == 0.0 ? 1.0 : - 1.0 + 1.0 / config_.backstitch_training_interval)) - << " % of the time."; -} - void ObjectiveFunctionInfo::UpdateStats( const std::string &output_name, int32 minibatches_per_phase, diff --git a/src/nnet3/nnet-training.h b/src/nnet3/nnet-training.h index fffc621930a..64ec7abc58e 100644 --- a/src/nnet3/nnet-training.h +++ b/src/nnet3/nnet-training.h @@ -26,6 +26,7 @@ #include "nnet3/nnet-compute.h" #include "nnet3/nnet-optimize.h" #include "nnet3/nnet-example-utils.h" +#include "nnet3/nnet-utils.h" namespace kaldi { namespace nnet3 { @@ -187,10 +188,6 @@ class NnetTrainer { // Prints out the final stats, and return true if there was a nonzero count. bool PrintTotalStats() const; - // Prints out the max-change stats (if nonzero): the percentage of time that - // per-component max-change and global max-change were enforced. - void PrintMaxChangeStats() const; - ~NnetTrainer(); private: // The internal function for doing one step of conventional SGD training. @@ -220,8 +217,7 @@ class NnetTrainer { int32 num_minibatches_processed_; // stats for max-change. - std::vector num_max_change_per_component_applied_; - int32 num_max_change_global_applied_; + MaxChangeStats max_change_stats_; unordered_map objf_info_; diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index e020f8fc6a7..541d2735529 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -1655,7 +1655,6 @@ class ModelCollapser { component_index2); } - /** Tries to produce a component that's equivalent to running the component 'component_index2' with input given by 'component_index1'. This handles @@ -2173,5 +2172,47 @@ void ApplyL2Regularization(const Nnet &nnet, } +bool UpdateNnetWithMaxChange(const Nnet &delta_nnet, + BaseFloat max_param_change, + BaseFloat max_change_scale, + BaseFloat scale, Nnet *nnet, + MaxChangeStats *stats) { + bool ans = UpdateNnetWithMaxChange( + delta_nnet, max_param_change, max_change_scale, + scale, nnet, + &(stats->num_max_change_per_component_applied), + &(stats->num_max_change_global_applied)); + stats->num_minibatches_processed++; + return ans; +} + + +void MaxChangeStats::Print(const Nnet &nnet) const { + int32 i = 0; + for (int32 c = 0; c < nnet.NumComponents(); c++) { + const Component *comp = nnet.GetComponent(c); + if (comp->Properties() & kUpdatableComponent) { + const UpdatableComponent *uc = dynamic_cast( + comp); + if (uc == NULL) + KALDI_ERR << "Updatable component does not inherit from class " + << "UpdatableComponent; change this code."; + if (num_max_change_per_component_applied[i] > 0) + KALDI_LOG << "For " << nnet.GetComponentName(c) + << ", per-component max-change was enforced " + << ((100.0 * num_max_change_per_component_applied[i]) / + num_minibatches_processed) + << " \% of the time."; + i++; + } + } + if (num_max_change_global_applied > 0) + KALDI_LOG << "The global max-change was enforced " + << ((100.0 * num_max_change_global_applied) / + num_minibatches_processed) + << " \% of the time."; +} + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 787bd228a38..60a18f15d84 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -377,6 +377,17 @@ bool UpdateNnetWithMaxChange(const Nnet &delta_nnet, num_max_change_per_component_applied, int32 *num_max_change_global_applied); +struct MaxChangeStats; + +// This overloaded version of UpdateNnetWithMaxChange() is a convenience +// wrapper for when you have a MaxChangeStats object to keep track +// of how many times the max-change was applied. See documentation above. +bool UpdateNnetWithMaxChange(const Nnet &delta_nnet, + BaseFloat max_param_change, + BaseFloat max_change_scale, + BaseFloat scale, Nnet *nnet, + MaxChangeStats *stats); + /** This function is used as part of the regular training workflow, prior to @@ -513,6 +524,24 @@ int32 GetNumNvalues(const std::vector &io_vec, bool exhaustive); +struct MaxChangeStats { + int32 num_max_change_global_applied; + int32 num_minibatches_processed; + std::vector num_max_change_per_component_applied; + + MaxChangeStats(const Nnet &nnet): + num_max_change_global_applied(0), + num_minibatches_processed(0), + num_max_change_per_component_applied(NumUpdatableComponents(nnet), 0) { } + + // Prints the max-change stats. Usually will be called at the end + // of the program. The nnet is only needed for structural information, + // to work out the component names. + void Print(const Nnet &nnet) const; +}; + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/util/text-utils-test.cc b/src/util/text-utils-test.cc index 5bfe4cb24d0..3b58f4f1dd1 100644 --- a/src/util/text-utils-test.cc +++ b/src/util/text-utils-test.cc @@ -2,6 +2,7 @@ // Copyright 2009-2011 Microsoft Corporation // 2017 Johns Hopkins University (author: Daniel Povey) +// 2015 Vimal Manohar (Johns Hopkins University) // See ../../COPYING for clarification regarding multiple authors // @@ -324,6 +325,193 @@ void TestStringsApproxEqual() { KALDI_ASSERT(!StringsApproxEqual("x 1.0 y", "x 1.0001 y", 4)); } +void UnitTestConfigLineParse() { + std::string str; + { + ConfigLine cfl; + str = "a-b xx=yyy foo=bar baz=123 ba=1:2"; + bool status = cfl.ParseLine(str); + KALDI_ASSERT(status && cfl.FirstToken() == "a-b"); + + KALDI_ASSERT(cfl.HasUnusedValues()); + std::string str_value; + KALDI_ASSERT(cfl.GetValue("xx", &str_value)); + KALDI_ASSERT(str_value == "yyy"); + KALDI_ASSERT(cfl.HasUnusedValues()); + KALDI_ASSERT(cfl.GetValue("foo", &str_value)); + KALDI_ASSERT(str_value == "bar"); + KALDI_ASSERT(cfl.HasUnusedValues()); + KALDI_ASSERT(!cfl.GetValue("xy", &str_value)); + KALDI_ASSERT(cfl.GetValue("baz", &str_value)); + KALDI_ASSERT(str_value == "123"); + + std::vector int_values; + KALDI_ASSERT(!cfl.GetValue("xx", &int_values)); + KALDI_ASSERT(cfl.GetValue("baz", &int_values)); + KALDI_ASSERT(cfl.HasUnusedValues()); + KALDI_ASSERT(int_values.size() == 1 && int_values[0] == 123); + KALDI_ASSERT(cfl.GetValue("ba", &int_values)); + KALDI_ASSERT(int_values.size() == 2 && int_values[0] == 1 && int_values[1] == 2); + KALDI_ASSERT(!cfl.HasUnusedValues()); + } + + { + ConfigLine cfl; + str = "a-b baz=x y z pp = qq ab =cd ac= bd"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "a-b baz=x y z pp = qq ab=cd ac=bd"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "foo-bar"; + KALDI_ASSERT(cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "foo-bar a=b c d f=g"; + std::string value; + KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "foo-bar" && + cfl.GetValue("a", &value) && value == "b c d" && + cfl.GetValue("f", &value) && value == "g" && + !cfl.HasUnusedValues()); + } + { + ConfigLine cfl; + str = "zzz a=b baz"; + KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "zzz" && + cfl.UnusedValues() == "a=b baz"); + } + { + ConfigLine cfl; + str = "xxx a=b baz "; + KALDI_ASSERT(cfl.ParseLine(str) && cfl.UnusedValues() == "a=b baz"); + } + { + ConfigLine cfl; + str = "xxx a=b =c"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "xxx baz='x y z' pp=qq ab=cd ac=bd"; + KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "xxx"); + std::string str_value; + KALDI_ASSERT(cfl.GetValue("baz", &str_value)); + KALDI_ASSERT(str_value == "x y z"); + KALDI_ASSERT(cfl.GetValue("pp", &str_value)); + KALDI_ASSERT(str_value == "qq"); + KALDI_ASSERT(cfl.UnusedValues() == "ab=cd ac=bd"); + KALDI_ASSERT(cfl.GetValue("ab", &str_value)); + KALDI_ASSERT(str_value == "cd"); + KALDI_ASSERT(cfl.UnusedValues() == "ac=bd"); + KALDI_ASSERT(cfl.HasUnusedValues()); + KALDI_ASSERT(cfl.GetValue("ac", &str_value)); + KALDI_ASSERT(str_value == "bd"); + KALDI_ASSERT(!cfl.HasUnusedValues()); + } + + { + ConfigLine cfl; + str = "x baz= pp = qq flag=t "; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = " x baz= pp=qq flag=t "; + KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "x"); + + std::string str_value; + KALDI_ASSERT(cfl.GetValue("baz", &str_value)); + KALDI_ASSERT(str_value == ""); + KALDI_ASSERT(cfl.GetValue("pp", &str_value)); + KALDI_ASSERT(str_value == "qq"); + KALDI_ASSERT(cfl.HasUnusedValues()); + KALDI_ASSERT(cfl.GetValue("flag", &str_value)); + KALDI_ASSERT(str_value == "t"); + KALDI_ASSERT(!cfl.HasUnusedValues()); + + bool bool_value = false; + KALDI_ASSERT(cfl.GetValue("flag", &bool_value)); + KALDI_ASSERT(bool_value); + } + + { + ConfigLine cfl; + str = "xx _baz=a -pp=qq"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "xx 0baz=a pp=qq"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "xx -baz=a pp=qq"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = "xx _baz'=a pp=qq"; + KALDI_ASSERT(!cfl.ParseLine(str)); + } + { + ConfigLine cfl; + str = " baz=g"; + KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == ""); + bool flag; + KALDI_ASSERT(!cfl.GetValue("baz", &flag)); + } + { + ConfigLine cfl; + str = "xx _baz1=a pp=qq"; + KALDI_ASSERT(cfl.ParseLine(str)); + + std::string str_value; + KALDI_ASSERT(cfl.GetValue("_baz1", &str_value)); + } +} + +void UnitTestReadConfig() { + std::string str = "a-b alpha=aa beta=\"b b\"# String test\n" + "a-b beta2='b c' beta3=bd # \n" + "a-b gamma=1:2:3:4 # Int Vector test\n" + " a-b de1ta=f # Bool + Integer in key Comment test delta=t \n" + "a-b _epsilon=-1 # Int Vector test _epsilon=1 \n" + "a-b zet-_a=0.15 theta=1.1# Float, -, _ test\n" + "a-b quoted='a b c' # quoted string\n" + "a-b quoted2=\"d e 'a b=c' f\" # string quoted with double quotes"; + + std::istringstream is(str); + std::vector lines; + ReadConfigLines(is, &lines); + KALDI_ASSERT(lines.size() == 8); + + ConfigLine cfl; + for (size_t i = 0; i < lines.size(); i++) { + KALDI_ASSERT(cfl.ParseLine(lines[i]) && cfl.FirstToken() == "a-b"); + if (i == 1) { + KALDI_ASSERT(cfl.GetValue("beta2", &str) && str == "b c"); + } + if (i == 4) { + KALDI_ASSERT(cfl.GetValue("_epsilon", &str) && str == "-1"); + } + if (i == 5) { + BaseFloat float_val = 0; + KALDI_ASSERT(cfl.GetValue("zet-_a", &float_val) && ApproxEqual(float_val, 0.15)); + } + if (i == 6) { + KALDI_ASSERT(cfl.GetValue("quoted", &str) && str == "a b c"); + } + if (i == 7) { + KALDI_ASSERT(cfl.GetValue("quoted2", &str) && str == "d e 'a b=c' f"); + } + } +} } // end namespace kaldi @@ -344,5 +532,7 @@ int main() { TestNan(); TestInf(); TestInf(); + UnitTestConfigLineParse(); + UnitTestReadConfig(); std::cout << "Test OK\n"; } diff --git a/src/util/text-utils.cc b/src/util/text-utils.cc index 200e3ad9327..bbf38ecc5cc 100644 --- a/src/util/text-utils.cc +++ b/src/util/text-utils.cc @@ -340,4 +340,252 @@ bool StringsApproxEqual(const std::string &a, } +bool ConfigLine::ParseLine(const std::string &line) { + data_.clear(); + whole_line_ = line; + if (line.size() == 0) return false; // Empty line + size_t pos = 0, size = line.size(); + while (isspace(line[pos]) && pos < size) pos++; + if (pos == size) + return false; // whitespace-only line + size_t first_token_start_pos = pos; + // first get first_token_. + while (!isspace(line[pos]) && pos < size) { + if (line[pos] == '=') { + // If the first block of non-whitespace looks like "foo-bar=...", + // then we ignore it: there is no initial token, and FirstToken() + // is empty. + pos = first_token_start_pos; + break; + } + pos++; + } + first_token_ = std::string(line, first_token_start_pos, pos - first_token_start_pos); + // first_token_ is expected to be either empty or something like + // "component-node", which actually is a slightly more restrictive set of + // strings than IsValidName() checks for this is a convenient way to check it. + if (!first_token_.empty() && !IsValidName(first_token_)) + return false; + + while (pos < size) { + if (isspace(line[pos])) { + pos++; + continue; + } + + // OK, at this point we know that we are pointing at nonspace. + size_t next_equals_sign = line.find_first_of("=", pos); + if (next_equals_sign == pos || next_equals_sign == std::string::npos) { + // we're looking for something like 'key=value'. If there is no equals sign, + // or it's not preceded by something, it's a parsing failure. + return false; + } + std::string key(line, pos, next_equals_sign - pos); + if (!IsValidName(key)) return false; + + // handle any quotes. we support key='blah blah' or key="foo bar". + // no escaping is supported. + if (line[next_equals_sign+1] == '\'' || line[next_equals_sign+1] == '"') { + char my_quote = line[next_equals_sign+1]; + size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2); + if (next_quote == std::string::npos) { // no matching quote was found. + KALDI_WARN << "No matching quote for " << my_quote << " in config line '" + << line << "'"; + return false; + } else { + std::string value(line, next_equals_sign + 2, + next_quote - next_equals_sign - 2); + data_.insert(std::make_pair(key, std::make_pair(value, false))); + pos = next_quote + 1; + continue; + } + } else { + // we want to be able to parse something like "... input=Offset(a, -1) foo=bar": + // in general, config values with spaces in them, even without quoting. + + size_t next_next_equals_sign = line.find_first_of("=", next_equals_sign + 1), + terminating_space = size; + + if (next_next_equals_sign != std::string::npos) { // found a later equals sign. + size_t preceding_space = line.find_last_of(" \t", next_next_equals_sign); + if (preceding_space != std::string::npos && + preceding_space > next_equals_sign) + terminating_space = preceding_space; + } + while (isspace(line[terminating_space - 1]) && terminating_space > 0) + terminating_space--; + + std::string value(line, next_equals_sign + 1, + terminating_space - (next_equals_sign + 1)); + data_.insert(std::make_pair(key, std::make_pair(value, false))); + pos = terminating_space; + } + } + return true; +} + +bool ConfigLine::GetValue(const std::string &key, std::string *value) { + KALDI_ASSERT(value != NULL); + std::map >::iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (it->first == key) { + *value = (it->second).first; + (it->second).second = true; + return true; + } + } + return false; +} + +bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) { + KALDI_ASSERT(value != NULL); + std::map >::iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (it->first == key) { + if (!ConvertStringToReal((it->second).first, value)) + return false; + (it->second).second = true; + return true; + } + } + return false; +} + +bool ConfigLine::GetValue(const std::string &key, int32 *value) { + KALDI_ASSERT(value != NULL); + std::map >::iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (it->first == key) { + if (!ConvertStringToInteger((it->second).first, value)) + return false; + (it->second).second = true; + return true; + } + } + return false; +} + +bool ConfigLine::GetValue(const std::string &key, std::vector *value) { + KALDI_ASSERT(value != NULL); + value->clear(); + std::map >::iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (it->first == key) { + if (!SplitStringToIntegers((it->second).first, ":,", true, value)) { + // KALDI_WARN << "Bad option " << (it->second).first; + return false; + } + (it->second).second = true; + return true; + } + } + return false; +} + +bool ConfigLine::GetValue(const std::string &key, bool *value) { + KALDI_ASSERT(value != NULL); + std::map >::iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (it->first == key) { + if ((it->second).first.size() == 0) return false; + switch (((it->second).first)[0]) { + case 'F': + case 'f': + *value = false; + break; + case 'T': + case 't': + *value = true; + break; + default: + return false; + } + (it->second).second = true; + return true; + } + } + return false; +} + +bool ConfigLine::HasUnusedValues() const { + std::map >::const_iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (!(it->second).second) return true; + } + return false; +} + +std::string ConfigLine::UnusedValues() const { + std::string unused_str; + std::map >::const_iterator it = data_.begin(); + for (; it != data_.end(); ++it) { + if (!(it->second).second) { + if (unused_str == "") + unused_str = it->first + "=" + (it->second).first; + else + unused_str += " " + it->first + "=" + (it->second).first; + } + } + return unused_str; +} + +// This is like ExpectToken but for two tokens, and it +// will either accept token1 and then token2, or just token2. +// This is useful in Read functions where the first token +// may already have been consumed. +void ExpectOneOrTwoTokens(std::istream &is, bool binary, + const std::string &token1, + const std::string &token2) { + KALDI_ASSERT(token1 != token2); + std::string temp; + ReadToken(is, binary, &temp); + if (temp == token1) { + ExpectToken(is, binary, token2); + } else { + if (temp != token2) { + KALDI_ERR << "Expecting token " << token1 << " or " << token2 + << " but got " << temp; + } + } +} + + +bool IsValidName(const std::string &name) { + if (name.size() == 0) return false; + for (size_t i = 0; i < name.size(); i++) { + if (i == 0 && !isalpha(name[i]) && name[i] != '_') + return false; + if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.') + return false; + } + return true; +} + +void ReadConfigLines(std::istream &is, + std::vector *lines) { + KALDI_ASSERT(lines != NULL); + std::string line; + while (std::getline(is, line)) { + if (line.size() == 0) continue; + size_t start = line.find_first_not_of(" \t"); + size_t end = line.find_first_of('#'); + if (start == std::string::npos || start == end) continue; + end = line.find_last_not_of(" \t", end - 1); + KALDI_ASSERT(end >= start); + lines->push_back(line.substr(start, end - start + 1)); + } +} + +void ParseConfigLines(const std::vector &lines, + std::vector *config_lines) { + config_lines->resize(lines.size()); + for (size_t i = 0; i < lines.size(); i++) { + bool ret = (*config_lines)[i].ParseLine(lines[i]); + if (!ret) { + KALDI_ERR << "Error parsing config line: " << lines[i]; + } + } +} + + } // end namespace kaldi diff --git a/src/util/text-utils.h b/src/util/text-utils.h index 7bc20957672..02f4bf483fc 100644 --- a/src/util/text-utils.h +++ b/src/util/text-utils.h @@ -183,6 +183,98 @@ bool StringsApproxEqual(const std::string &a, const std::string &b, int32 decimal_places_check = 2); +/** + This class is responsible for parsing input like + hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" + and giving you access to the fields, in this case + + FirstToken() == "hi-there", and key->value pairs: + + xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123", + bing->"a b c", baz->"a b c d='a b' e" + + The first token is optional, if the line started with a key-value pair then + FirstValue() will be empty. + + Note: it can parse value fields with space inside them only if they are free of the '=' + character. If values are going to contain the '=' character, you need to quote them + with either single or double quotes. + + Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_. + */ +class ConfigLine { + public: + // Tries to parse the line as a config-file line. Returns false + // if it could not for some reason, e.g. parsing failure. In most cases + // prints no warnings; the user should do this. Does not expect comments. + bool ParseLine(const std::string &line); + + // the GetValue functions are overloaded for various types. They return true + // if the key exists with value that can be converted to that type, and false + // otherwise. They also mark the key-value pair as having been read. It is + // not an error to read values twice. + bool GetValue(const std::string &key, std::string *value); + bool GetValue(const std::string &key, BaseFloat *value); + bool GetValue(const std::string &key, int32 *value); + // Values may be separated by ":" or by ",". + bool GetValue(const std::string &key, std::vector *value); + bool GetValue(const std::string &key, bool *value); + + bool HasUnusedValues() const; + /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one + /// of the GetValue() functions. + std::string UnusedValues() const; + + const std::string &FirstToken() const { return first_token_; } + + const std::string WholeLine() { return whole_line_; } + // use default assignment operator and copy constructor. + private: + std::string whole_line_; + // the first token of the line, e.g. if line is + // foo-bar baz=bing + // then first_token_ would be "foo-bar". + std::string first_token_; + + // data_ maps from key to (value, is-this-value-consumed?). + std::map > data_; + +}; + +/// This function is like ExpectToken but for two tokens, and it will either +/// accept token1 and then token2, or just token2. This is useful in Read +/// functions where the first token may already have been consumed. +void ExpectOneOrTwoTokens(std::istream &is, bool binary, + const std::string &token1, + const std::string &token2); + + +/** + This function reads in a config file and *appends* its contents to a vector of + lines; it is responsible for removing comments (anything after '#') and + stripping out any lines that contain only whitespace after comment removal. + */ +void ReadConfigLines(std::istream &is, + std::vector *lines); + + +/** + This function converts config-lines from a simple sequence of strings + as output by ReadConfigLines(), into a sequence of first-tokens and + name-value pairs. The general format is: + "command-type bar=baz xx=yyy" + etc., although there are subtleties as to what exactly is allowed, see + documentation for class ConfigLine for details. + This function will die if there was a parsing failure. + */ +void ParseConfigLines(const std::vector &lines, + std::vector *config_lines); + + +/// Returns true if 'name' would be a valid name for a component or node in a +/// nnet3Nnet. This is a nonempty string beginning with A-Za-z_, and containing only +/// '-', '_', '.', A-Z, a-z, or 0-9. +bool IsValidName(const std::string &name); } // namespace kaldi From fcd70d3a7ec3e9aec26484109ded31ae1773a840 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Sat, 16 Mar 2019 20:54:24 +0530 Subject: [PATCH 083/235] [scripts] Simplify text encoding in RNNLM scripts (now only support utf-8) (#3065) --- scripts/rnnlm/choose_features.py | 10 +++------- scripts/rnnlm/get_best_model.py | 4 ++-- scripts/rnnlm/get_embedding_dim.py | 2 +- scripts/rnnlm/get_special_symbol_opts.py | 6 +++--- scripts/rnnlm/get_unigram_probs.py | 14 +++++++------- scripts/rnnlm/get_vocab.py | 8 ++++---- scripts/rnnlm/get_word_features.py | 14 +++++++------- scripts/rnnlm/prepare_split_data.py | 12 ++++++------ scripts/rnnlm/rnnlm_cleanup.py | 2 +- scripts/rnnlm/show_word_features.py | 17 ++++++----------- scripts/rnnlm/validate_features.py | 6 +++--- scripts/rnnlm/validate_text_dir.py | 10 +++++----- scripts/rnnlm/validate_word_features.py | 10 +++++----- 13 files changed, 53 insertions(+), 62 deletions(-) diff --git a/scripts/rnnlm/choose_features.py b/scripts/rnnlm/choose_features.py index c6621e04494..595c1d85bc1 100755 --- a/scripts/rnnlm/choose_features.py +++ b/scripts/rnnlm/choose_features.py @@ -10,12 +10,8 @@ from collections import defaultdict sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) -# because this script splits inside words, we cannot use latin-1; we actually need to know what -# what the encoding is. By default we make this utf-8; to handle encodings that are not compatible -# with utf-8 (e.g. gbk), we'll eventually have to make the encoding an option to this script. - import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script chooses the sparse feature representation of words. " "To be more specific, it chooses the set of features-- you compute " @@ -92,7 +88,7 @@ def read_vocab(vocab_file): vocab = {} with open(vocab_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -121,7 +117,7 @@ def read_unigram_probs(unigram_probs_file): unigram_probs = [] with open(unigram_probs_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 idx = int(fields[0]) if idx >= len(unigram_probs): diff --git a/scripts/rnnlm/get_best_model.py b/scripts/rnnlm/get_best_model.py index 333ed8dbfc7..ed266346e06 100755 --- a/scripts/rnnlm/get_best_model.py +++ b/scripts/rnnlm/get_best_model.py @@ -21,7 +21,7 @@ num_iters = None try: - with open(args.rnnlm_dir + "/info.txt", encoding="latin-1") as f: + with open(args.rnnlm_dir + "/info.txt", encoding="utf-8") as f: for line in f: a = line.split("=") if a[0] == "num_iters": @@ -40,7 +40,7 @@ for i in range(1, num_iters): this_logfile = "{0}/log/compute_prob.{1}.log".format(args.rnnlm_dir, i) try: - f = open(this_logfile, 'r', encoding='latin-1') + f = open(this_logfile, 'r', encoding='utf-8') except: sys.exit(sys.argv[0] + ": could not open log-file {0}".format(this_logfile)) this_objf = -1000 diff --git a/scripts/rnnlm/get_embedding_dim.py b/scripts/rnnlm/get_embedding_dim.py index 63eaf307498..1d516e0edf5 100755 --- a/scripts/rnnlm/get_embedding_dim.py +++ b/scripts/rnnlm/get_embedding_dim.py @@ -45,7 +45,7 @@ left_context=0 right_context=0 for line in out_lines: - line = line.decode('latin-1') + line = line.decode('utf-8') m = re.search(r'input-node name=input dim=(\d+)', line) if m is not None: try: diff --git a/scripts/rnnlm/get_special_symbol_opts.py b/scripts/rnnlm/get_special_symbol_opts.py index 4310b116ad7..7ee0ca54c9a 100755 --- a/scripts/rnnlm/get_special_symbol_opts.py +++ b/scripts/rnnlm/get_special_symbol_opts.py @@ -9,7 +9,7 @@ import sys import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script checks whether the special symbols " "appear in words.txt with expected values, if not, it will " @@ -28,9 +28,9 @@ lower_ids = {} upper_ids = {} -input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1') +input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') for line in input_stream: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) == 2) sym = fields[0] if sym in special_symbols: diff --git a/scripts/rnnlm/get_unigram_probs.py b/scripts/rnnlm/get_unigram_probs.py index ab3f9bb382f..e3189b26a92 100755 --- a/scripts/rnnlm/get_unigram_probs.py +++ b/scripts/rnnlm/get_unigram_probs.py @@ -8,7 +8,7 @@ import sys import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script gets the unigram probabilities of words.", epilog="E.g. " + sys.argv[0] + " --vocab-file=data/rnnlm/vocab/words.txt " @@ -77,10 +77,10 @@ def get_all_data_sources_except_dev(text_dir): # value is a tuple (repeated_times_per_epoch, weight) def read_data_weights(weights_file, data_sources): data_weights = {} - with open(weights_file, 'r', encoding="latin-1") as f: + with open(weights_file, 'r', encoding="utf-8") as f: for line in f: try: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 3 if fields[0] in data_weights: raise Exception("duplicated data source({0}) specified in " @@ -102,9 +102,9 @@ def read_data_weights(weights_file, data_sources): # return the vocab, which is a dict mapping the word to a integer id. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="latin-1") as f: + with open(vocab_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -131,9 +131,9 @@ def get_counts(data_sources, data_weights, vocab): if weight == 0.0: continue - with open(counts_file, 'r', encoding="latin-1") as f: + with open(counts_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() if len(fields) != 2: print("Warning, should be 2 cols:", fields, line, file=sys.stderr); assert(len(fields) == 2) word = fields[0] diff --git a/scripts/rnnlm/get_vocab.py b/scripts/rnnlm/get_vocab.py index 1502e915f9c..baafcb3a131 100755 --- a/scripts/rnnlm/get_vocab.py +++ b/scripts/rnnlm/get_vocab.py @@ -6,10 +6,10 @@ import os import argparse import sys -sys.stdout = open(1, 'w', encoding='latin-1', closefd=False) +sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script get a vocab from unigram counts " "of words produced by get_unigram_counts.sh", @@ -28,10 +28,10 @@ # Add the count for every word in counts_file # the result is written into word_counts def add_counts(word_counts, counts_file): - with open(counts_file, 'r', encoding="latin-1") as f: + with open(counts_file, 'r', encoding="utf-8") as f: for line in f: line = line.strip(" \t\r\n") - word_and_count = re.split(tab_or_space, line) + word_and_count = line.split() assert len(word_and_count) == 2 if word_and_count[0] in word_counts: word_counts[word_and_count[0]] += int(word_and_count[1]) diff --git a/scripts/rnnlm/get_word_features.py b/scripts/rnnlm/get_word_features.py index aeb7a3ec6ae..cdcc0a77734 100755 --- a/scripts/rnnlm/get_word_features.py +++ b/scripts/rnnlm/get_word_features.py @@ -10,7 +10,7 @@ from collections import defaultdict import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script turns the words into the sparse feature representation, " "using features from rnnlm/choose_features.py.", @@ -41,9 +41,9 @@ # return the vocab, which is a dict mapping the word to a integer id. def read_vocab(vocab_file): vocab = {} - with open(vocab_file, 'r', encoding="latin-1") as f: + with open(vocab_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 if fields[0] in vocab: sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}" @@ -62,9 +62,9 @@ def read_vocab(vocab_file): # return a list of unigram_probs, indexed by word id def read_unigram_probs(unigram_probs_file): unigram_probs = [] - with open(unigram_probs_file, 'r', encoding="latin-1") as f: + with open(unigram_probs_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 2 idx = int(fields[0]) if idx >= len(unigram_probs): @@ -103,9 +103,9 @@ def read_features(features_file): feats['min_ngram_order'] = 10000 feats['max_ngram_order'] = -1 - with open(features_file, 'r', encoding="latin-1") as f: + with open(features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [3, 4, 5]) feat_id = int(fields[0]) diff --git a/scripts/rnnlm/prepare_split_data.py b/scripts/rnnlm/prepare_split_data.py index cceac48313e..427f043df98 100755 --- a/scripts/rnnlm/prepare_split_data.py +++ b/scripts/rnnlm/prepare_split_data.py @@ -9,7 +9,7 @@ import sys import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script prepares files containing integerized text, " "for consumption by nnet3-get-egs.", @@ -66,10 +66,10 @@ def get_all_data_sources_except_dev(text_dir): # value is a tuple (repeated_times_per_epoch, weight) def read_data_weights(weights_file, data_sources): data_weights = {} - with open(weights_file, 'r', encoding="latin-1") as f: + with open(weights_file, 'r', encoding="utf-8") as f: for line in f: try: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) == 3 if fields[0] in data_weights: raise Exception("duplicated data source({0}) specified in " @@ -97,7 +97,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles): num_outputs = len(output_filehandles) n = 0 try: - f = open(source_filename, 'r', encoding="latin-1") + f = open(source_filename, 'r', encoding="utf-8") except Exception as e: sys.exit(sys.argv[0] + ": failed to open file {0} for reading: {1} ".format( source_filename, str(e))) @@ -124,7 +124,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles): os.makedirs(args.split_dir + "/info") # set up the 'num_splits' file, which contains an integer. -with open("{0}/info/num_splits".format(args.split_dir), 'w', encoding="latin-1") as f: +with open("{0}/info/num_splits".format(args.split_dir), 'w', encoding="utf-8") as f: print(args.num_splits, file=f) # e.g. set temp_files = [ 'foo/1.tmp', 'foo/2.tmp', ..., 'foo/5.tmp' ] @@ -136,7 +136,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles): temp_filehandles = [] for fname in temp_files: try: - temp_filehandles.append(open(fname, 'w', encoding="latin-1")) + temp_filehandles.append(open(fname, 'w', encoding="utf-8")) except Exception as e: sys.exit(sys.argv[0] + ": failed to open file: " + str(e) + ".. if this is a max-open-filehandles limitation, you may " diff --git a/scripts/rnnlm/rnnlm_cleanup.py b/scripts/rnnlm/rnnlm_cleanup.py index 40cbee7a496..6a304f7f4cb 100644 --- a/scripts/rnnlm/rnnlm_cleanup.py +++ b/scripts/rnnlm/rnnlm_cleanup.py @@ -69,7 +69,7 @@ def get_compute_prob_info(log_file): compute_prob_done = False # roughly based on code in get_best_model.py try: - f = open(log_file, "r", encoding="latin-1") + f = open(log_file, "r", encoding="utf-8") except: print(script_name + ": warning: compute_prob log not found for iteration " + str(iter) + ". Skipping", diff --git a/scripts/rnnlm/show_word_features.py b/scripts/rnnlm/show_word_features.py index 89b134adaf9..4335caed5d8 100755 --- a/scripts/rnnlm/show_word_features.py +++ b/scripts/rnnlm/show_word_features.py @@ -7,15 +7,10 @@ import argparse import sys -# The use of latin-1 encoding does not preclude reading utf-8. latin-1 encoding -# means "treat words as sequences of bytes", and it is compatible with utf-8 -# encoding as well as other encodings such as gbk, as long as the spaces are -# also spaces in ascii (which we check). It is basically how we emulate the -# behavior of python before python3. -sys.stdout = open(1, 'w', encoding='latin-1', closefd=False) +sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="This script turns the word features to a human readable format.", epilog="E.g. " + sys.argv[0] + "exp/rnnlm/word_feats.txt exp/rnnlm/features.txt " @@ -36,9 +31,9 @@ def read_feature_type_and_key(features_file): feat_types = {} - with open(features_file, 'r', encoding="latin-1") as f: + with open(features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [2, 3, 4]) feat_id = int(fields[0]) @@ -53,9 +48,9 @@ def read_feature_type_and_key(features_file): feat_type_and_key = read_feature_type_and_key(args.features_file) num_word_feats = 0 -with open(args.word_features_file, 'r', encoding="latin-1") as f: +with open(args.word_features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) % 2 == 1 print(int(fields[0]), end='\t') diff --git a/scripts/rnnlm/validate_features.py b/scripts/rnnlm/validate_features.py index 2a077da4758..e67f03207bb 100755 --- a/scripts/rnnlm/validate_features.py +++ b/scripts/rnnlm/validate_features.py @@ -8,7 +8,7 @@ import sys import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="Validates features file, produced by rnnlm/choose_features.py.", epilog="E.g. " + sys.argv[0] + " exp/rnnlm/features.txt", @@ -24,7 +24,7 @@ if not os.path.isfile(args.features_file): sys.exit(sys.argv[0] + ": Expected file {0} to exist".format(args.features_file)) -with open(args.features_file, 'r', encoding="latin-1") as f: +with open(args.features_file, 'r', encoding="utf-8") as f: has_unigram = False has_length = False idx = 0 @@ -33,7 +33,7 @@ final_feats = {} word_feats = {} for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [3, 4, 5]) assert idx == int(fields[0]) diff --git a/scripts/rnnlm/validate_text_dir.py b/scripts/rnnlm/validate_text_dir.py index 903e720bdf4..1f250d4c2f8 100755 --- a/scripts/rnnlm/validate_text_dir.py +++ b/scripts/rnnlm/validate_text_dir.py @@ -8,7 +8,7 @@ import sys import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="Validates data directory containing text " "files from one or more data sources, including dev.txt.", @@ -40,7 +40,7 @@ def check_text_file(text_file): - with open(text_file, 'r', encoding="latin-1") as f: + with open(text_file, 'r', encoding="utf-8") as f: found_nonempty_line = False lineno = 0 if args.allow_internal_eos == 'true': @@ -54,7 +54,7 @@ def check_text_file(text_file): lineno += 1 if args.spot_check == 'true' and lineno > 10: break - words = re.split(tab_or_space, line) + words = line.split() if len(words) != 0: found_nonempty_line = True for word in words: @@ -76,9 +76,9 @@ def check_text_file(text_file): # with some kind of utterance-id first_field_set = set() other_fields_set = set() - with open(text_file, 'r', encoding="latin-1") as f: + with open(text_file, 'r', encoding="utf-8") as f: for line in f: - array = re.split(tab_or_space, line) + array = line.split() if len(array) > 0: first_word = array[0] if first_word in first_field_set or first_word in other_fields_set: diff --git a/scripts/rnnlm/validate_word_features.py b/scripts/rnnlm/validate_word_features.py index 205b934ae1b..372286d8d12 100755 --- a/scripts/rnnlm/validate_word_features.py +++ b/scripts/rnnlm/validate_word_features.py @@ -8,7 +8,7 @@ import sys import re -tab_or_space = re.compile('[ \t]+') + parser = argparse.ArgumentParser(description="Validates word features file, produced by rnnlm/get_word_features.py.", epilog="E.g. " + sys.argv[0] + " --features-file=exp/rnnlm/features.txt " @@ -28,9 +28,9 @@ unigram_feat_id = -1 length_feat_id = -1 max_feat_id = -1 -with open(args.features_file, 'r', encoding="latin-1") as f: +with open(args.features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert(len(fields) in [3, 4, 5]) feat_id = int(fields[0]) @@ -52,9 +52,9 @@ if feat_id > max_feat_id: max_feat_id = feat_id -with open(args.word_features_file, 'r', encoding="latin-1") as f: +with open(args.word_features_file, 'r', encoding="utf-8") as f: for line in f: - fields = re.split(tab_or_space, line) + fields = line.split() assert len(fields) > 0 and len(fields) % 2 == 1 word_id = int(fields[0]) From b4c7ab60e925372b9639d27b51e3cb84088b8588 Mon Sep 17 00:00:00 2001 From: yfliao Date: Sat, 16 Mar 2019 23:25:45 +0800 Subject: [PATCH 084/235] [egs] Add "formosa_speech" recipe (Taiwanese Mandarin ASR) (#2474) --- egs/formosa/README.txt | 22 ++ egs/formosa/s5/RESULTS | 43 ++++ egs/formosa/s5/cmd.sh | 27 +++ egs/formosa/s5/conf/decode.config | 5 + egs/formosa/s5/conf/mfcc.conf | 2 + egs/formosa/s5/conf/mfcc_hires.conf | 10 + egs/formosa/s5/conf/online_cmvn.conf | 1 + egs/formosa/s5/conf/pitch.conf | 1 + egs/formosa/s5/local/chain/run_tdnn.sh | 1 + .../s5/local/chain/tuning/run_tdnn_1a.sh | 181 +++++++++++++++ .../s5/local/chain/tuning/run_tdnn_1b.sh | 188 +++++++++++++++ .../s5/local/chain/tuning/run_tdnn_1c.sh | 191 +++++++++++++++ .../s5/local/chain/tuning/run_tdnn_1d.sh | 190 +++++++++++++++ .../s5/local/nnet3/run_ivector_common.sh | 145 ++++++++++++ egs/formosa/s5/local/nnet3/run_tdnn.sh | 113 +++++++++ egs/formosa/s5/local/prepare_data.sh | 60 +++++ egs/formosa/s5/local/prepare_dict.sh | 55 +++++ egs/formosa/s5/local/prepare_lm.sh | 42 ++++ .../s5/local/run_cleanup_segmentation.sh | 66 ++++++ egs/formosa/s5/local/score.sh | 8 + egs/formosa/s5/local/train_lms.sh | 63 +++++ egs/formosa/s5/local/wer_hyp_filter | 19 ++ egs/formosa/s5/local/wer_output_filter | 25 ++ egs/formosa/s5/local/wer_ref_filter | 19 ++ egs/formosa/s5/path.sh | 6 + egs/formosa/s5/run.sh | 217 ++++++++++++++++++ egs/formosa/s5/steps | 1 + egs/formosa/s5/utils | 1 + 28 files changed, 1702 insertions(+) create mode 100644 egs/formosa/README.txt create mode 100644 egs/formosa/s5/RESULTS create mode 100755 egs/formosa/s5/cmd.sh create mode 100644 egs/formosa/s5/conf/decode.config create mode 100644 egs/formosa/s5/conf/mfcc.conf create mode 100644 egs/formosa/s5/conf/mfcc_hires.conf create mode 100644 egs/formosa/s5/conf/online_cmvn.conf create mode 100644 egs/formosa/s5/conf/pitch.conf create mode 120000 egs/formosa/s5/local/chain/run_tdnn.sh create mode 100755 egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh create mode 100755 egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh create mode 100755 egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh create mode 100755 egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh create mode 100755 egs/formosa/s5/local/nnet3/run_ivector_common.sh create mode 100755 egs/formosa/s5/local/nnet3/run_tdnn.sh create mode 100755 egs/formosa/s5/local/prepare_data.sh create mode 100755 egs/formosa/s5/local/prepare_dict.sh create mode 100755 egs/formosa/s5/local/prepare_lm.sh create mode 100755 egs/formosa/s5/local/run_cleanup_segmentation.sh create mode 100755 egs/formosa/s5/local/score.sh create mode 100755 egs/formosa/s5/local/train_lms.sh create mode 100755 egs/formosa/s5/local/wer_hyp_filter create mode 100755 egs/formosa/s5/local/wer_output_filter create mode 100755 egs/formosa/s5/local/wer_ref_filter create mode 100755 egs/formosa/s5/path.sh create mode 100755 egs/formosa/s5/run.sh create mode 120000 egs/formosa/s5/steps create mode 120000 egs/formosa/s5/utils diff --git a/egs/formosa/README.txt b/egs/formosa/README.txt new file mode 100644 index 00000000000..3b9d78dad92 --- /dev/null +++ b/egs/formosa/README.txt @@ -0,0 +1,22 @@ +### Welcome to the demo recipe of the Formosa Speech in the Wild (FSW) Project ### + +The language habits of Taiwanese people are different from other Mandarin speakers (both accents and cultures) [1]. Especially Tainwaese use tranditional Chinese characters, i.e., 繁體中文). To address this issue, a Taiwanese speech corpus collection project "Formosa Speech in the Wild (FSW)" was initiated in 2017 to improve the development of Taiwanese-specific speech recognition techniques. + +FSW corpus will be a large-scale database of real-Life/multi-gene Taiwanese Spontaneous speech collected and transcribed from various sources (radio, TV, open courses, etc.). To demostrate that this database is a reasonable data resource for Taiwanese spontaneous speech recognition research, a baseline recipe is provied here for everybody, especially students, to develop their own systems easily and quickly. + +This recipe is based on the "NER-Trs-Vol1" corpus (about 150 hours broadcast radio speech selected from FSW). For more details, please visit: +* Formosa Speech in the Wild (FSW) project (https://sites.google.com/speech.ntut.edu.tw/fsw) + +If you want to apply the NER-Trs-Vol1 corpus, please contact Yuan-Fu Liao (廖元甫) via "yfliao@mail.ntut.edu.tw". This corpus is only for non-commercial research/education use and will be distributed via our GitLab server in https://speech.nchc.org.tw. + +Any bug, errors, comments or suggestions are very welcomed. + +Yuan-Fu Liao (廖元甫) +Associate Professor +Department of electronic Engineering, +National Taipei University of Technology +http://www.ntut.edu.tw/~yfliao +yfliao@mail.ntut.edu.tw + +............ +[1] The languages of Taiwan consist of several varieties of languages under families of the Austronesian languages and the Sino-Tibetan languages. Taiwanese Mandarin, Hokkien, Hakka and Formosan languages are used by 83.5%, 81.9%, 6.6% and 1.4% of the population respectively (2010). Given the prevalent use of Taiwanese Hokkien, the Mandarin spoken in Taiwan has been to a great extent influenced by it. diff --git a/egs/formosa/s5/RESULTS b/egs/formosa/s5/RESULTS new file mode 100644 index 00000000000..b047e5cefe4 --- /dev/null +++ b/egs/formosa/s5/RESULTS @@ -0,0 +1,43 @@ +# +# Reference results +# +# Experimental settings: +# +# training set: show CS, BG, DA, QG, SR, SY and WK, in total 18977 utt., 1,088,948 words +# test set: show JZ, GJ, KX and YX, in total 2112 utt., 135,972 words +# eval set: show JX, TD and WJ, in total 2222 utt., 104,648 words +# +# lexicon: 274,036 words +# phones (IPA): 196 (tonal) +# + +# WER: test + +%WER 61.32 [ 83373 / 135972, 5458 ins, 19156 del, 58759 sub ] exp/mono/decode_test/wer_11_0.0 +%WER 41.00 [ 55742 / 135972, 6725 ins, 12763 del, 36254 sub ] exp/tri1/decode_test/wer_15_0.0 +%WER 40.41 [ 54948 / 135972, 7366 ins, 11505 del, 36077 sub ] exp/tri2/decode_test/wer_14_0.0 +%WER 38.67 [ 52574 / 135972, 6855 ins, 11250 del, 34469 sub ] exp/tri3a/decode_test/wer_15_0.0 +%WER 35.70 [ 48546 / 135972, 7197 ins, 9717 del, 31632 sub ] exp/tri4a/decode_test/wer_17_0.0 +%WER 32.11 [ 43661 / 135972, 6112 ins, 10185 del, 27364 sub ] exp/tri5a/decode_test/wer_17_0.5 +%WER 31.36 [ 42639 / 135972, 6846 ins, 8860 del, 26933 sub ] exp/tri5a_cleaned/decode_test/wer_17_0.5 +%WER 24.43 [ 33218 / 135972, 5524 ins, 7583 del, 20111 sub ] exp/nnet3/tdnn_sp/decode_test/wer_12_0.0 +%WER 23.95 [ 32568 / 135972, 4457 ins, 10271 del, 17840 sub ] exp/chain/tdnn_1a_sp/decode_test/wer_10_0.0 +%WER 23.54 [ 32006 / 135972, 4717 ins, 8644 del, 18645 sub ] exp/chain/tdnn_1b_sp/decode_test/wer_10_0.0 +%WER 20.64 [ 28067 / 135972, 4434 ins, 7946 del, 15687 sub ] exp/chain/tdnn_1c_sp/decode_test/wer_11_0.0 +%WER 20.98 [ 28527 / 135972, 4706 ins, 7816 del, 16005 sub ] exp/chain/tdnn_1d_sp/decode_test/wer_10_0.0 + +# CER: test + +%WER 54.09 [ 116688 / 215718, 4747 ins, 24510 del, 87431 sub ] exp/mono/decode_test/cer_10_0.0 +%WER 32.61 [ 70336 / 215718, 5866 ins, 16282 del, 48188 sub ] exp/tri1/decode_test/cer_13_0.0 +%WER 32.10 [ 69238 / 215718, 6186 ins, 15772 del, 47280 sub ] exp/tri2/decode_test/cer_13_0.0 +%WER 30.40 [ 65583 / 215718, 6729 ins, 13115 del, 45739 sub ] exp/tri3a/decode_test/cer_12_0.0 +%WER 27.53 [ 59389 / 215718, 6311 ins, 13008 del, 40070 sub ] exp/tri4a/decode_test/cer_15_0.0 +%WER 24.21 [ 52232 / 215718, 6425 ins, 11543 del, 34264 sub ] exp/tri5a/decode_test/cer_15_0.0 +%WER 23.41 [ 50492 / 215718, 6645 ins, 10997 del, 32850 sub ] exp/tri5a_cleaned/decode_test/cer_17_0.0 +%WER 17.07 [ 36829 / 215718, 4734 ins, 9938 del, 22157 sub ] exp/nnet3/tdnn_sp/decode_test/cer_12_0.0 +%WER 16.83 [ 36305 / 215718, 4772 ins, 10810 del, 20723 sub ] exp/chain/tdnn_1a_sp/decode_test/cer_9_0.0 +%WER 16.44 [ 35459 / 215718, 4216 ins, 11278 del, 19965 sub ] exp/chain/tdnn_1b_sp/decode_test/cer_10_0.0 +%WER 13.72 [ 29605 / 215718, 4678 ins, 8066 del, 16861 sub ] exp/chain/tdnn_1c_sp/decode_test/cer_10_0.0 +%WER 14.08 [ 30364 / 215718, 5182 ins, 7588 del, 17594 sub ] exp/chain/tdnn_1d_sp/decode_test/cer_9_0.0 + diff --git a/egs/formosa/s5/cmd.sh b/egs/formosa/s5/cmd.sh new file mode 100755 index 00000000000..66ae9090820 --- /dev/null +++ b/egs/formosa/s5/cmd.sh @@ -0,0 +1,27 @@ +# "queue.pl" uses qsub. The options to it are +# options to qsub. If you have GridEngine installed, +# change this to a queue you have access to. +# Otherwise, use "run.pl", which will run jobs locally +# (make sure your --num-jobs options are no more than +# the number of cpus on your machine. + +# Run locally: +#export train_cmd=run.pl +#export decode_cmd=run.pl + +# JHU cluster (or most clusters using GridEngine, with a suitable +# conf/queue.conf). +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" + +host=$(hostname -f) +if [ ${host#*.} == "fit.vutbr.cz" ]; then + queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf, + export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2" + export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1" + export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G" +elif [ ${host#*.} == "cm.cluster" ]; then + # MARCC bluecrab cluster: + export train_cmd="slurm.pl --time 4:00:00 " + export decode_cmd="slurm.pl --mem 4G --time 4:00:00 " +fi diff --git a/egs/formosa/s5/conf/decode.config b/egs/formosa/s5/conf/decode.config new file mode 100644 index 00000000000..d91f86183af --- /dev/null +++ b/egs/formosa/s5/conf/decode.config @@ -0,0 +1,5 @@ +beam=11.0 # beam for decoding. Was 13.0 in the scripts. +first_beam=8.0 # beam for 1st-pass decoding in SAT. + + + diff --git a/egs/formosa/s5/conf/mfcc.conf b/egs/formosa/s5/conf/mfcc.conf new file mode 100644 index 00000000000..a1aa3d6c158 --- /dev/null +++ b/egs/formosa/s5/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--sample-frequency=16000 diff --git a/egs/formosa/s5/conf/mfcc_hires.conf b/egs/formosa/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..ca067e77b37 --- /dev/null +++ b/egs/formosa/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=16000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 8000 (=3800) diff --git a/egs/formosa/s5/conf/online_cmvn.conf b/egs/formosa/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..591367e7ae9 --- /dev/null +++ b/egs/formosa/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used when invoking online2-wav-nnet3-latgen-faster. diff --git a/egs/formosa/s5/conf/pitch.conf b/egs/formosa/s5/conf/pitch.conf new file mode 100644 index 00000000000..e959a19d5b8 --- /dev/null +++ b/egs/formosa/s5/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=16000 diff --git a/egs/formosa/s5/local/chain/run_tdnn.sh b/egs/formosa/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..e1adaa9346d --- /dev/null +++ b/egs/formosa/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1d.sh \ No newline at end of file diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..d52644a66d1 --- /dev/null +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,181 @@ +#!/bin/bash + +# This script is based on run_tdnn_7h.sh in swbd chain recipe. + +set -e + +# configs for 'chain' +affix=1a +stage=0 +train_stage=-10 +get_egs_stage=-10 +dir=exp/chain/tdnn # Note: _sp will get added to this +decode_iter= + +# training options +num_epochs=4 +initial_effective_lrate=0.001 +final_effective_lrate=0.0001 +max_param_change=2.0 +final_layer_normalize_target=0.5 +num_jobs_initial=2 +num_jobs_final=12 +minibatch_size=128 +frames_per_eg=150,110,90 +remove_egs=false +common_egs_dir= +xent_regularize=0.1 + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 9 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 5000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 10 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=625 + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=625 + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=625 + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=625 + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=625 + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=625 + + ## adding the layers for chain branch + relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 11 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_eg \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --trainer.max-param-change $max_param_change \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri5a_sp_lats \ + --use-gpu wait \ + --dir $dir || exit 1; +fi + +if [ $stage -le 12 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +graph_dir=$dir/graph +if [ $stage -le 13 ]; then + for test_set in test eval; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 10 --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_$test_set \ + $graph_dir data/${test_set}_hires $dir/decode_${test_set} || exit 1; + done + wait; +fi + +exit 0; diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..0134e63bce2 --- /dev/null +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh @@ -0,0 +1,188 @@ +#!/bin/bash + +# This script shows improvement arising from data cleaning. + +# CER: +# %WER 16.83 [ 36305 / 215718, 4772 ins, 10810 del, 20723 sub ] exp/chain/tdnn_1a_sp/decode_test/cer_9_0.0 +# %WER 16.44 [ 35459 / 215718, 4216 ins, 11278 del, 19965 sub ] exp/chain/tdnn_1b_sp/decode_test/cer_10_0.0 + +# steps/info/chain_dir_info.pl exp/chain/tdnn_1b_sp +# exp/chain/tdnn_1b_sp: num-iters=133 nj=2..12 num-params=12.5M dim=43+100->4528 combine=-0.073->-0.073 (over 2) xent:train/valid[87,132,final]=(-1.05,-0.964,-0.963/-1.10,-1.06,-1.05) logprob:train/valid[87,132,final]=(-0.079,-0.065,-0.065/-0.094,-0.092,-0.092) + +set -e + +# configs for 'chain' +affix=1b +nnet3_affix=_1b +stage=0 +train_stage=-10 +get_egs_stage=-10 +dir=exp/chain/tdnn # Note: _sp will get added to this +decode_iter= + +# training options +num_epochs=4 +initial_effective_lrate=0.001 +final_effective_lrate=0.0001 +max_param_change=2.0 +final_layer_normalize_target=0.5 +num_jobs_initial=2 +num_jobs_final=12 +minibatch_size=128 +frames_per_eg=150,110,90 +remove_egs=false +common_egs_dir= +xent_regularize=0.1 + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 9 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 5000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 10 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=625 + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=625 + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=625 + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=625 + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=625 + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=625 + + ## adding the layers for chain branch + relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 11 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_eg \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --trainer.max-param-change $max_param_change \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri5a_sp_lats \ + --use-gpu wait \ + --dir $dir || exit 1; +fi + +if [ $stage -le 12 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +graph_dir=$dir/graph +if [ $stage -le 13 ]; then + for test_set in test eval; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 10 --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_$test_set \ + $graph_dir data/${test_set}_hires $dir/decode_${test_set} || exit 1; + done + wait; +fi +exit 0; diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..36ea128fdde --- /dev/null +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh @@ -0,0 +1,191 @@ +#!/bin/bash + +# CER: +# %WER 16.44 [ 35459 / 215718, 4216 ins, 11278 del, 19965 sub ] exp/chain/tdnn_1b_sp/decode_test/cer_10_0.0 +# %WER 13.72 [ 29605 / 215718, 4678 ins, 8066 del, 16861 sub ] exp/chain/tdnn_1c_sp/decode_test/cer_10_0.0 + +# steps/info/chain_dir_info.pl exp/chain/tdnn_1c_sp +# exp/chain/tdnn_1c_sp: num-iters=147 nj=3..16 num-params=17.9M dim=43+100->4528 combine=-0.041->-0.041 (over 2) xent:train/valid[97,146,final]=(-0.845,-0.625,-0.618/-0.901,-0.710,-0.703) logprob:train/valid[97,146,final]=(-0.064,-0.040,-0.039/-0.072,-0.058,-0.057) + +set -e + +# configs for 'chain' +affix=1c +nnet3_affix=_1b +stage=0 +train_stage=-10 +get_egs_stage=-10 +dir=exp/chain/tdnn # Note: _sp will get added to this +decode_iter= + +# training options +num_epochs=6 +initial_effective_lrate=0.00025 +final_effective_lrate=0.000025 +max_param_change=2.0 +final_layer_normalize_target=0.5 +num_jobs_initial=3 +num_jobs_final=16 +minibatch_size=64 +frames_per_eg=150,110,90 +remove_egs=false +common_egs_dir= +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.5@0.50,0' + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 9 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 5000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 10 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.002" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + linear-component name=prefinal-l dim=256 $linear_opts + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 11 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3$nnet3_affix/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.0 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0 --constrained false" \ + --egs.chunk-width $frames_per_eg \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --trainer.max-param-change $max_param_change \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri5a_sp_lats \ + --use-gpu wait \ + --dir $dir || exit 1; +fi + +if [ $stage -le 12 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +graph_dir=$dir/graph +if [ $stage -le 13 ]; then + for test_set in test eval; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 10 --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3${nnet3_affix:+_$nnet3_affix}/ivectors_$test_set \ + $graph_dir data/${test_set}_hires $dir/decode_${test_set} || exit 1; + done + wait; +fi + +exit 0; diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh new file mode 100755 index 00000000000..be21f2402a9 --- /dev/null +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# CER: +# 1a: %WER 16.83 [ 36305 / 215718, 4772 ins, 10810 del, 20723 sub ] exp/chain/tdnn_1a_sp/decode_test/cer_9_0.0 +# 1d: %WER 14.08 [ 30364 / 215718, 5182 ins, 7588 del, 17594 sub ] exp/chain/tdnn_1d_sp/decode_test/cer_9_0.0 + +# steps/info/chain_dir_info.pl exp/chain/tdnn_1d_sp +# exp/chain/tdnn_1d_sp: num-iters=157 nj=3..16 num-params=18.6M dim=43+100->5792 combine=-0.050->-0.050 (over 1) xent:train/valid[103,156,final]=(-0.977,-0.735,-0.725/-0.953,-0.772,-0.768) logprob:train/valid[103,156,final]=(-0.077,-0.052,-0.052/-0.079,-0.065,-0.066) + +set -e + +# configs for 'chain' +affix=1d +stage=0 +train_stage=-10 +get_egs_stage=-10 +dir=exp/chain/tdnn # Note: _sp will get added to this +decode_iter= + +# training options +num_epochs=6 +initial_effective_lrate=0.00025 +final_effective_lrate=0.000025 +max_param_change=2.0 +final_layer_normalize_target=0.5 +num_jobs_initial=3 +num_jobs_final=16 +minibatch_size=64 +frames_per_eg=150,110,90 +remove_egs=false +common_egs_dir= +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.5@0.50,0' + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 9 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 10 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.002" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 + linear-component name=prefinal-l dim=256 $linear_opts + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 11 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3$nnet3_affix/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.0 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0 --constrained false" \ + --egs.chunk-width $frames_per_eg \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --trainer.max-param-change $max_param_change \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri5a_sp_lats \ + --use-gpu wait \ + --dir $dir || exit 1; +fi + +if [ $stage -le 12 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +graph_dir=$dir/graph +if [ $stage -le 13 ]; then + for test_set in test eval; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 10 --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3${nnet3_affix:+_$nnet3_affix}/ivectors_$test_set \ + $graph_dir data/${test_set}_hires $dir/decode_${test_set} || exit 1; + done + wait; +fi + +exit 0; diff --git a/egs/formosa/s5/local/nnet3/run_ivector_common.sh b/egs/formosa/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..723589ddd2e --- /dev/null +++ b/egs/formosa/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +set -euo pipefail + +# This script is modified based on mini_librispeech/s5/local/nnet3/run_ivector_common.sh + +# This script is called from local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more +# scripts). It contains the common feature preparation and +# iVector-related parts of the script. See those scripts for examples +# of usage. + +stage=0 +train_set=train +test_sets="test eval" +gmm=tri5a + +nnet3_affix= + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_sp_ali + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +if [ $stage -le 1 ]; then + # Although the nnet will be trained by high resolution data, we still have to + # perturb the normal data to get the alignment _sp stands for speed-perturbed + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + echo "$0: making MFCC features for low-resolution speed-perturbed data" + steps/make_mfcc_pitch.sh --cmd "$train_cmd" --nj 70 data/${train_set}_sp \ + exp/make_mfcc/${train_set}_sp mfcc_perturbed || exit 1; + steps/compute_cmvn_stats.sh data/${train_set}_sp \ + exp/make_mfcc/${train_set}_sp mfcc_perturbed || exit 1; + utils/fix_data_dir.sh data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13). + # this shows how you can split across multiple file-systems. + echo "$0: creating high-resolution MFCC features" + mfccdir=mfcc_perturbed_hires + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc_pitch.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_hires || exit 1; + # create MFCC data dir without pitch to extract iVector + utils/data/limit_feature_dim.sh 0:39 data/${datadir}_hires data/${datadir}_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires_nopitch exp/make_hires/$datadir $mfccdir || exit 1; + done +fi + +if [ $stage -le 4 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + # We'll use about a quarter of the data. + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + num_utts_total=$(wc -l $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=850 + relu-batchnorm-layer name=tdnn2 dim=850 input=Append(-1,0,2) + relu-batchnorm-layer name=tdnn3 dim=850 input=Append(-3,0,3) + relu-batchnorm-layer name=tdnn4 dim=850 input=Append(-7,0,2) + relu-batchnorm-layer name=tdnn5 dim=850 input=Append(-3,0,3) + relu-batchnorm-layer name=tdnn6 dim=850 + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 8 ]; then + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 500 \ + --use-gpu wait \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 9 ]; then + # this version of the decoding treats each utterance separately + # without carrying forward speaker information. + + for decode_set in test eval; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + decode_dir=${dir}/decode_$decode_set + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $decode_dir || exit 1; + done + wait; +fi + +exit 0; diff --git a/egs/formosa/s5/local/prepare_data.sh b/egs/formosa/s5/local/prepare_data.sh new file mode 100755 index 00000000000..68f342e1549 --- /dev/null +++ b/egs/formosa/s5/local/prepare_data.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2018 Yuan-Fu Liao, National Taipei University of Technology +# AsusTek Computer Inc. (Author: Alex Hung) + +# Apache 2.0 + +set -e -o pipefail + +train_dir=NER-Trs-Vol1/Train +eval_dir=NER-Trs-Vol1-Eval +eval_key_dir=NER-Trs-Vol1-Eval-Key + +. ./path.sh +. parse_options.sh + +for x in $train_dir $eval_dir; do + if [ ! -d "$x" ] ; then + echo >&2 "The directory $x does not exist" + fi +done + +if [ -z "$(command -v dos2unix 2>/dev/null)" ]; then + echo "dos2unix not found on PATH. Please install it manually." + exit 1; +fi + +# have to remvoe previous files to avoid filtering speakers according to cmvn.scp and feats.scp +rm -rf data/all data/train data/test data/eval data/local/train +mkdir -p data/all data/train data/test data/eval data/local/train + + +# make utt2spk, wav.scp and text +find $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/all/utt2spk +find $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/all/wav.scp +find $train_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/all/text + +# fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp, +# duplicate entries and so on). Also, it regenerates the spk2utt from +# utt2spk +utils/fix_data_dir.sh data/all + +echo "Preparing train and test data" +# test set: JZ, GJ, KX, YX +grep -E "(JZ|GJ|KX|YX)_" data/all/utt2spk | awk '{print $1}' > data/all/cv.spk +utils/subset_data_dir_tr_cv.sh --cv-spk-list data/all/cv.spk data/all data/train data/test + +# for LM training +echo "cp data/train/text data/local/train/text for language model training" +cat data/train/text | awk '{$1=""}1;' | awk '{$1=$1}1;' > data/local/train/text + +# preparing EVAL set. +find $eval_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/eval/utt2spk +find $eval_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/eval/wav.scp +find $eval_key_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/eval/text +utils/fix_data_dir.sh data/eval + +echo "Data preparation completed." +exit 0; diff --git a/egs/formosa/s5/local/prepare_dict.sh b/egs/formosa/s5/local/prepare_dict.sh new file mode 100755 index 00000000000..4e580f5f6e8 --- /dev/null +++ b/egs/formosa/s5/local/prepare_dict.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2018 Yuan-Fu Liao, National Taipei University of Technology +# Apache 2.0 + +source_dir=NER-Trs-Vol1/Language +dict_dir=data/local/dict +rm -rf $dict_dir +mkdir -p $dict_dir + +# +# +# +rm -f $dict_dir/lexicon.txt +touch $dict_dir/lexicon.txt +cat $source_dir/lexicon.txt > $dict_dir/lexicon.txt +echo " SIL" >> $dict_dir/lexicon.txt + +# +# define silence phone +# +rm -f $dict_dir/silence_phones.txt +touch $dict_dir/silence_phones.txt + +echo "SIL" > $dict_dir/silence_phones.txt + +# +# find nonsilence phones +# +rm -f $dict_dir/nonsilence_phones.txt +touch $dict_dir/nonsilence_phones.txt + +cat $source_dir/lexicon.txt | grep -v -F -f $dict_dir/silence_phones.txt | \ + perl -ane 'print join("\n", @F[1..$#F]) . "\n"; ' | \ + sort -u > $dict_dir/nonsilence_phones.txt + +# +# add optional silence phones +# + +rm -f $dict_dir/optional_silence.txt +touch $dict_dir/optional_silence.txt +echo "SIL" > $dict_dir/optional_silence.txt + +# +# extra questions +# +rm -f $dict_dir/extra_questions.txt +touch $dict_dir/extra_questions.txt +cat $dict_dir/silence_phones.txt | awk '{printf("%s ", $1);} END{printf "\n";}' > $dict_dir/extra_questions.txt || exit 1; +cat $dict_dir/nonsilence_phones.txt | awk '{printf("%s ", $1);} END{printf "\n";}' >> $dict_dir/extra_questions.txt || exit 1; + +echo "Dictionary preparation succeeded" +exit 0; diff --git a/egs/formosa/s5/local/prepare_lm.sh b/egs/formosa/s5/local/prepare_lm.sh new file mode 100755 index 00000000000..59fe1529658 --- /dev/null +++ b/egs/formosa/s5/local/prepare_lm.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +set -e -o pipefail + +# To create G.fst from ARPA language model +. ./path.sh || die "path.sh expected"; + +local/train_lms_srilm.sh --train-text data/train/text data/ data/srilm + +#nl -nrz -w10 corpus/LM/iban-bp-2012.txt | utils/shuffle_list.pl > data/local/external_text +local/train_lms_srilm.sh --train-text data/local/external_text data/ data/srilm_external + +# let's do ngram interpolation of the previous two LMs +# the lm.gz is always symlink to the model with the best perplexity, so we use that + +mkdir -p data/srilm_interp +for w in 0.9 0.8 0.7 0.6 0.5; do + ngram -lm data/srilm/lm.gz -mix-lm data/srilm_external/lm.gz \ + -lambda $w -write-lm data/srilm_interp/lm.${w}.gz + echo -n "data/srilm_interp/lm.${w}.gz " + ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s - +done | sort -k15,15g > data/srilm_interp/perplexities.txt + +# for basic decoding, let's use only a trigram LM +[ -d data/lang_test/ ] && rm -rf data/lang_test +cp -R data/lang data/lang_test +lm=$(cat data/srilm/perplexities.txt | grep 3gram | head -n1 | awk '{print $1}') +local/arpa2G.sh $lm data/lang_test data/lang_test + +# for decoding using bigger LM let's find which interpolated gave the most improvement +[ -d data/lang_big ] && rm -rf data/lang_big +cp -R data/lang data/lang_big +lm=$(cat data/srilm_interp/perplexities.txt | head -n1 | awk '{print $1}') +local/arpa2G.sh $lm data/lang_big data/lang_big + +# for really big lm, we should only decode using small LM +# and resocre using the big lm +utils/build_const_arpa_lm.sh $lm data/lang_big data/lang_big +exit 0; diff --git a/egs/formosa/s5/local/run_cleanup_segmentation.sh b/egs/formosa/s5/local/run_cleanup_segmentation.sh new file mode 100755 index 00000000000..b72cd89b4d1 --- /dev/null +++ b/egs/formosa/s5/local/run_cleanup_segmentation.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# 2016 Johns Hopkins University (author: Daniel Povey) +# 2017 Nagendra Kumar Goel +# 2019 AsusTek Computer Inc. (author: Alex Hung) +# Apache 2.0 + +# This script demonstrates how to re-segment training data selecting only the +# "good" audio that matches the transcripts. +# The basic idea is to decode with an existing in-domain acoustic model, and a +# biased language model built from the reference, and then work out the +# segmentation from a ctm like file. + +# For nnet3 and chain results after cleanup, see the scripts in +# local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh + +# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets +# [will add these later]. + +set -e +set -o pipefail +set -u + +stage=0 +cleanup_stage=0 +data=data/train +cleanup_affix=cleaned +srcdir=exp/tri5a +langdir=data/lang_test +nj=20 +decode_nj=20 +decode_num_threads=1 + +. ./cmd.sh +if [ -f ./path.sh ]; then . ./path.sh; fi +. utils/parse_options.sh + +cleaned_data=${data}_${cleanup_affix} + +dir=${srcdir}_${cleanup_affix}_work +cleaned_dir=${srcdir}_${cleanup_affix} + +if [ $stage -le 1 ]; then + # This does the actual data cleanup. + steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage \ + --nj $nj --cmd "$train_cmd" \ + $data $langdir $srcdir $dir $cleaned_data +fi + +if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + $cleaned_data $langdir $srcdir ${srcdir}_ali_${cleanup_affix} +fi + +if [ $stage -le 3 ]; then + steps/train_sat.sh --cmd "$train_cmd" \ + 3500 100000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir} +fi + +utils/data/get_utt2dur.sh data/train_cleaned +ori_avg_dur=$(awk 'BEGIN{total=0}{total += $2}END{printf("%.2f", total/NR)}' ${data}/utt2dur) +new_avg_dur=$(awk 'BEGIN{total=0}{total += $2}END{printf("%.2f", total/NR)}' ${cleaned_data}/utt2dur) +echo "average duration was reduced from ${ori_avg_dur}s to ${new_avg_dur}s." +# average duration was reduced from 21.68s to 10.97s. +exit 0; diff --git a/egs/formosa/s5/local/score.sh b/egs/formosa/s5/local/score.sh new file mode 100755 index 00000000000..a9786169973 --- /dev/null +++ b/egs/formosa/s5/local/score.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e -o pipefail +set -x +steps/score_kaldi.sh "$@" +steps/scoring/score_kaldi_cer.sh --stage 2 "$@" + +echo "$0: Done" diff --git a/egs/formosa/s5/local/train_lms.sh b/egs/formosa/s5/local/train_lms.sh new file mode 100755 index 00000000000..efc5b92c573 --- /dev/null +++ b/egs/formosa/s5/local/train_lms.sh @@ -0,0 +1,63 @@ +#!/bin/bash + + +# To be run from one directory above this script. +. ./path.sh + +text=data/local/train/text +lexicon=data/local/dict/lexicon.txt + +for f in "$text" "$lexicon"; do + [ ! -f $x ] && echo "$0: No such file $f" && exit 1; +done + +# This script takes no arguments. It assumes you have already run +# aishell_data_prep.sh. +# It takes as input the files +# data/local/train/text +# data/local/dict/lexicon.txt +dir=data/local/lm +mkdir -p $dir + +kaldi_lm=`which train_lm.sh` +if [ -z $kaldi_lm ]; then + echo "$0: train_lm.sh is not found. That might mean it's not installed" + echo "$0: or it is not added to PATH" + echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it" + exit 1 +fi + +cleantext=$dir/text.no_oov + +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ + > $cleantext || exit 1; + +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ + sort -nr > $dir/word.counts || exit 1; + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; + +# note: we probably won't really make use of as there aren't any OOVs +cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ + || exit 1; + +# note: ignore 1st field of train.txt, it's the utterance-id. +cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} + { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ + || exit 1; + +train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; + +# LM is small enough that we don't need to prune it (only about 0.7M N-grams). +# Perplexity over 128254.000000 words is 90.446690 + +# note: output is +# data/local/lm/3gram-mincount/lm_unpruned.gz + +exit 0; diff --git a/egs/formosa/s5/local/wer_hyp_filter b/egs/formosa/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..519d92ee80d --- /dev/null +++ b/egs/formosa/s5/local/wer_hyp_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=(''); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/formosa/s5/local/wer_output_filter b/egs/formosa/s5/local/wer_output_filter new file mode 100755 index 00000000000..06a99a43e34 --- /dev/null +++ b/egs/formosa/s5/local/wer_output_filter @@ -0,0 +1,25 @@ +#!/usr/bin/env perl +# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 +use utf8; + +use open qw(:encoding(utf8)); +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +while (<>) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "")) { + print ""; + } else { + print "$s" + } + print " "; + } + print "\n"; +} + + diff --git a/egs/formosa/s5/local/wer_ref_filter b/egs/formosa/s5/local/wer_ref_filter new file mode 100755 index 00000000000..519d92ee80d --- /dev/null +++ b/egs/formosa/s5/local/wer_ref_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=(''); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/formosa/s5/path.sh b/egs/formosa/s5/path.sh new file mode 100755 index 00000000000..2d17b17a84a --- /dev/null +++ b/egs/formosa/s5/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/formosa/s5/run.sh b/egs/formosa/s5/run.sh new file mode 100755 index 00000000000..a4d0f2dcd1d --- /dev/null +++ b/egs/formosa/s5/run.sh @@ -0,0 +1,217 @@ +#!/bin/bash +# +# Copyright 2018, Yuan-Fu Liao, National Taipei University of Technology, yfliao@mail.ntut.edu.tw +# +# Before you run this recipe, please apply, download and put or make a link of the corpus under this folder (folder name: "NER-Trs-Vol1"). +# For more detail, please check: +# 1. Formosa Speech in the Wild (FSW) project (https://sites.google.com/speech.ntut.edu.tw/fsw/home/corpus) +# 2. Formosa Speech Recognition Challenge (FSW) 2018 (https://sites.google.com/speech.ntut.edu.tw/fsw/home/challenge) +stage=-2 +num_jobs=20 + +train_dir=NER-Trs-Vol1/Train +eval_dir=NER-Trs-Vol1-Eval +eval_key_dir=NER-Trs-Vol1-Eval-Key + +# shell options +set -eo pipefail + +. ./cmd.sh +. ./utils/parse_options.sh + +# configure number of jobs running in parallel, you should adjust these numbers according to your machines +# data preparation +if [ $stage -le -2 ]; then + # Lexicon Preparation, + echo "$0: Lexicon Preparation" + local/prepare_dict.sh || exit 1; + + # Data Preparation + echo "$0: Data Preparation" + local/prepare_data.sh --train-dir $train_dir --eval-dir $eval_dir --eval-key-dir $eval_key_dir || exit 1; + + # Phone Sets, questions, L compilation + echo "$0: Phone Sets, questions, L compilation Preparation" + rm -rf data/lang + utils/prepare_lang.sh --position-dependent-phones false data/local/dict \ + "" data/local/lang data/lang || exit 1; + + # LM training + echo "$0: LM training" + rm -rf data/local/lm/3gram-mincount + local/train_lms.sh || exit 1; + + # G compilation, check LG composition + echo "$0: G compilation, check LG composition" + utils/format_lm.sh data/lang data/local/lm/3gram-mincount/lm_unpruned.gz \ + data/local/dict/lexicon.txt data/lang_test || exit 1; + +fi + +# Now make MFCC plus pitch features. +# mfccdir should be some place with a largish disk where you +# want to store MFCC features. +mfccdir=mfcc + +# mfcc +if [ $stage -le -1 ]; then + echo "$0: making mfccs" + for x in train test eval; do + steps/make_mfcc_pitch.sh --cmd "$train_cmd" --nj $num_jobs data/$x exp/make_mfcc/$x $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; + utils/fix_data_dir.sh data/$x || exit 1; + done +fi + +# mono +if [ $stage -le 0 ]; then + echo "$0: train mono model" + # Make some small data subsets for early system-build stages. + echo "$0: make training subsets" + utils/subset_data_dir.sh --shortest data/train 3000 data/train_mono + + # train mono + steps/train_mono.sh --boost-silence 1.25 --cmd "$train_cmd" --nj $num_jobs \ + data/train_mono data/lang exp/mono || exit 1; + + # Get alignments from monophone system. + steps/align_si.sh --boost-silence 1.25 --cmd "$train_cmd" --nj $num_jobs \ + data/train data/lang exp/mono exp/mono_ali || exit 1; + + # Monophone decoding + ( + utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph || exit 1; + steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj $num_jobs \ + exp/mono/graph data/test exp/mono/decode_test + )& +fi + +# tri1 +if [ $stage -le 1 ]; then + echo "$0: train tri1 model" + # train tri1 [first triphone pass] + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ + 2500 20000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + + # align tri1 + steps/align_si.sh --cmd "$train_cmd" --nj $num_jobs \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + + # decode tri1 + ( + utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; + steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj $num_jobs \ + exp/tri1/graph data/test exp/tri1/decode_test + )& +fi + +# tri2 +if [ $stage -le 2 ]; then + echo "$0: train tri2 model" + # train tri2 [delta+delta-deltas] + steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 20000 data/train data/lang exp/tri1_ali exp/tri2 || exit 1; + + # align tri2b + steps/align_si.sh --cmd "$train_cmd" --nj $num_jobs \ + data/train data/lang exp/tri2 exp/tri2_ali || exit 1; + + # decode tri2 + ( + utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph + steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj $num_jobs \ + exp/tri2/graph data/test exp/tri2/decode_test + )& +fi + +# tri3a +if [ $stage -le 3 ]; then + echo "$-: train tri3 model" + # Train tri3a, which is LDA+MLLT, + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + 2500 20000 data/train data/lang exp/tri2_ali exp/tri3a || exit 1; + + # decode tri3a + ( + utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; + steps/decode.sh --cmd "$decode_cmd" --nj $num_jobs --config conf/decode.config \ + exp/tri3a/graph data/test exp/tri3a/decode_test + )& +fi + +# tri4 +if [ $stage -le 4 ]; then + echo "$0: train tri4 model" + # From now, we start building a more serious system (with SAT), and we'll + # do the alignment with fMLLR. + steps/align_fmllr.sh --cmd "$train_cmd" --nj $num_jobs \ + data/train data/lang exp/tri3a exp/tri3a_ali || exit 1; + + steps/train_sat.sh --cmd "$train_cmd" \ + 2500 20000 data/train data/lang exp/tri3a_ali exp/tri4a || exit 1; + + # align tri4a + steps/align_fmllr.sh --cmd "$train_cmd" --nj $num_jobs \ + data/train data/lang exp/tri4a exp/tri4a_ali + + # decode tri4a + ( + utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj $num_jobs --config conf/decode.config \ + exp/tri4a/graph data/test exp/tri4a/decode_test + )& +fi + +# tri5 +if [ $stage -le 5 ]; then + echo "$0: train tri5 model" + # Building a larger SAT system. + steps/train_sat.sh --cmd "$train_cmd" \ + 3500 100000 data/train data/lang exp/tri4a_ali exp/tri5a || exit 1; + + # align tri5a + steps/align_fmllr.sh --cmd "$train_cmd" --nj $num_jobs \ + data/train data/lang exp/tri5a exp/tri5a_ali || exit 1; + + # decode tri5 + ( + utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj $num_jobs --config conf/decode.config \ + exp/tri5a/graph data/test exp/tri5a/decode_test || exit 1; + )& +fi + +# nnet3 tdnn models +# commented out by default, since the chain model is usually faster and better +#if [ $stage -le 6 ]; then + # echo "$0: train nnet3 model" + # local/nnet3/run_tdnn.sh +#fi + +# chain model +if [ $stage -le 7 ]; then + # The iVector-extraction and feature-dumping parts coulb be skipped by setting "--train_stage 7" + echo "$0: train chain model" + local/chain/run_tdnn.sh +fi + +# getting results (see RESULTS file) +if [ $stage -le 8 ]; then + echo "$0: extract the results" + for test_set in test eval; do + echo "WER: $test_set" + for x in exp/*/decode_${test_set}*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null + for x in exp/*/*/decode_${test_set}*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null + echo + + echo "CER: $test_set" + for x in exp/*/decode_${test_set}*; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null + for x in exp/*/*/decode_${test_set}*; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null + echo + done +fi + +# finish +echo "$0: all done" + +exit 0; diff --git a/egs/formosa/s5/steps b/egs/formosa/s5/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/formosa/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/formosa/s5/utils b/egs/formosa/s5/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/formosa/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file From 461b50c2c8d219c31eaa67fdb00587be0374a170 Mon Sep 17 00:00:00 2001 From: rickychanhoyin Date: Sat, 16 Mar 2019 23:26:51 +0800 Subject: [PATCH 085/235] [egs] python3 compatibility in csj example script (#3123) --- egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh index a463db77066..75ceb80e3e0 100755 --- a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -133,7 +133,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig From 61637e6c8ab01d3b4c54a50d9b20781a0aa12a59 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 17 Mar 2019 11:35:40 -0400 Subject: [PATCH 086/235] [egs] python3 compatibility in example scripts (#3126) --- egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh | 2 +- egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh | 2 +- .../s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh | 2 +- .../s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh | 2 +- egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh | 2 +- egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh | 2 +- egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh | 2 +- egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh | 2 +- egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh | 2 +- egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh | 2 +- egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh | 2 +- egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh | 2 +- egs/fisher_english/s5/local/chain/run_tdnn.sh | 2 +- .../semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh | 2 +- egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh | 2 +- .../semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh | 2 +- egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh | 2 +- egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh | 2 +- egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh | 2 +- egs/gale_arabic/s5/local/gale_format_data.sh | 2 +- egs/gale_arabic/s5/local/gale_train_lms.sh | 2 +- egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/gp/s1/local/gp_convert_audio.sh | 2 +- egs/gp/s1/utils/mkgraph.sh | 2 +- egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh | 2 +- egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh | 2 +- egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh | 2 +- egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_1a.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh | 2 +- egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh | 2 +- egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh | 2 +- egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh | 2 +- egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh | 2 +- egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/ifnenit/v1/local/chain/run_cnn_1a.sh | 2 +- egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh | 2 +- egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh | 2 +- egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh | 2 +- egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh | 2 +- egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh | 2 +- egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh | 2 +- egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh | 2 +- egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh | 2 +- egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh | 2 +- egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh | 2 +- egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh | 2 +- egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh | 2 +- egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh | 2 +- egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh | 2 +- egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh | 2 +- egs/rm/s5/local/run_vtln2.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/sprakbanken/s5/local/norm_dk/write_punct.sh | 2 +- egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh | 2 +- egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh | 2 +- .../s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh | 2 +- .../s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh | 2 +- .../s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh | 2 +- egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/tedlium/s5_r3/local/ted_download_lm.sh | 2 +- egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/uw3/v1/local/chain/run_cnn_1a.sh | 2 +- egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh | 2 +- egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh | 2 +- .../local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh | 2 +- .../local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh | 2 +- egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +- egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh | 2 +- .../local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh | 2 +- .../local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh | 2 +- egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh | 2 +- egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh | 2 +- 256 files changed, 256 insertions(+), 256 deletions(-) diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh index a0b183e3c5a..b38fa4d9c7a 100755 --- a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh @@ -90,7 +90,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh index 2ebe2a3092b..6b7223785d9 100755 --- a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh +++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh @@ -92,7 +92,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh index 459bd64eeb5..86c9becac5b 100755 --- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh @@ -103,7 +103,7 @@ fi if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" output_opts="l2-regularize=0.0005 bottleneck-dim=256" diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh index ba2a4344349..d8560e63909 100755 --- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh @@ -150,7 +150,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; feat_dim=$(feat-to-dim scp:data/${train_set}_hires/feats.scp -) num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" output_opts="l2-regularize=0.0005 bottleneck-dim=256" diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh index 1fc641f1166..4d260e3c517 100755 --- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh @@ -220,7 +220,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh index a8494420b0d..3546b6a7ced 100755 --- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh @@ -211,7 +211,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh index a12e7efa7b9..1a839b045bd 100755 --- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh @@ -235,7 +235,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.006" lstm_opts="l2-regularize=0.0025 decay-time=20 dropout-proportion=0.0" output_opts="l2-regularize=0.001" diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh index 16d1f4044f5..d926c1dc6d7 100644 --- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh @@ -184,7 +184,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh index 83e6a95582f..d9cd1c356e8 100644 --- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh @@ -176,7 +176,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0" diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh index 387b4bfcc88..a0805b4f9f1 100755 --- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh @@ -185,7 +185,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=40" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh index 57108dbddae..997357b80a9 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh @@ -164,7 +164,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh index f87e1a12d36..4d062e65429 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh @@ -151,7 +151,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh index eb84a1cd876..387570388d0 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh @@ -163,7 +163,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh index e6592b667dc..0436b08cdc0 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh @@ -161,7 +161,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh index 8bf2b73dada..4ca526d63b8 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh @@ -165,7 +165,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh index dfb6dfedee7..baed760bb68 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh @@ -166,7 +166,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh index 3e26a8b38bd..e721a858c0a 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh @@ -167,7 +167,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh index 1931127c86d..de40cb2d1a4 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh @@ -168,7 +168,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.02" output_opts="l2-regularize=0.004" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh index d63712f1f0f..4f580b88f6b 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -171,7 +171,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh index a53785f45c2..904a079d7de 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -173,7 +173,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh index 76a9f735c5f..511e520465a 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -172,7 +172,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh index 8cc1a4e15fa..bd81b7df4eb 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -172,7 +172,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh index accfd158a9d..50903e78b6d 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -174,7 +174,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh index 2b275e4e27d..f6c53001498 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -173,7 +173,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh index 1c90af38c4c..79fd9ef3fb5 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -174,7 +174,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh index fb4b6a475e2..e58a7f89e03 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -171,7 +171,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh index 92636b4c17e..13f894f5a48 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -174,7 +174,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh index 89fd8ce2915..48b31832e8c 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -181,7 +181,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh index b8d947d8e92..e675bc494bb 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -177,7 +177,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh index 74c0f5a6ead..2d019398274 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -224,7 +224,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh index b0e7af0618d..9e5b971bbe2 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -226,7 +226,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh index bee4d997b01..9575c3cf686 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -178,7 +178,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh index 1e4111adc6a..a7f2625c181 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -182,7 +182,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.025" lstm_opts="l2-regularize=0.01" output_opts="l2-regularize=0.004" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh index b672a44e572..ca920869b30 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh @@ -180,7 +180,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.003" lstm_opts="l2-regularize=0.005" output_opts="l2-regularize=0.001" diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh index f68c4203767..53dbd5238db 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -178,7 +178,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0" mkdir -p $dir/configs diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh index ac4266ca162..dafef668e60 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh @@ -177,7 +177,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0" mkdir -p $dir/configs diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh index 74b21f10c33..677946d0b9a 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh @@ -176,7 +176,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0" mkdir -p $dir/configs diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh index 8ff59d83ed0..bd13010c791 100755 --- a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh @@ -138,7 +138,7 @@ if [ $stage -le 11 ]; then num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh index 201f61dc64b..d6292fbadb3 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh @@ -136,7 +136,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 63d3a7ca988..e6aa37a7543 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -151,7 +151,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=40" diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh index 4f485edf7da..7b4535f8c5e 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh @@ -128,7 +128,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh index 72f7a3c32dd..5fc14dda826 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh @@ -129,7 +129,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh index be0c2cc4b9b..8c7de5d18d4 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh @@ -127,7 +127,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh index 8f21a239794..0b3e70b5a04 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh @@ -127,7 +127,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh index 7898d172242..45f2907645e 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh @@ -128,7 +128,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh index 49462573245..0d92aff5c28 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh @@ -128,7 +128,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh index c888d985f5e..4129c00dcb4 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh @@ -128,7 +128,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh index e9a045e113a..1cfa50c1aa1 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh @@ -128,7 +128,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh index ce192a91665..ba8ac1e0373 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh @@ -129,7 +129,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0" label_delay=5 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh index 3fc0ef2206c..5de285e080e 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh @@ -129,7 +129,7 @@ if [ $stage -le 17 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0 " label_delay=5 diff --git a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 6bac5a22398..ec530ef1ce4 100755 --- a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -139,7 +139,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh index d5ad3629cee..3f8b7c60090 100755 --- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh @@ -217,7 +217,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.01" output_opts="l2-regularize=0.005" diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh index f5c8973ab67..8b4e93cd05b 100755 --- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -180,7 +180,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh index 9fe4a20f43a..84bb2cb8dbd 100755 --- a/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh +++ b/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh @@ -82,4 +82,4 @@ for e_d in $tasks; do | utils/int2sym.pl -f 2- $graph_dir/words.txt \ | sed s:\::g done -done \ No newline at end of file +done diff --git a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh index 7173dcea78b..0bea4dd7102 100755 --- a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh +++ b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh @@ -165,4 +165,4 @@ if [ $stage -le 4 ]; then local/chime4_calc_wers_looped.sh $dir ${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest} $dir/graph_tgpr_5k \ > $dir/best_wer_looped_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result head -n 15 $dir/best_wer_looped_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result -fi \ No newline at end of file +fi diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh index 45a7fd84bd6..5418ecf2b4f 100755 --- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh @@ -133,7 +133,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05" output_opts="l2-regularize=0.01 bottleneck-dim=320" diff --git a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh index 635e3de1076..d4acd0fed4b 100755 --- a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh @@ -141,7 +141,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh b/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh index 4677ff473cb..297aed1f486 100755 --- a/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh +++ b/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh @@ -280,4 +280,4 @@ exit 0 %WER 14.88 [ 2557 / 17189, 556 ins, 359 del, 1642 sub ] exp/tandem2uc-tri4/decode_eval3_csj/wer_20_0.5 %WER 17.03 [ 2927 / 17189, 592 ins, 417 del, 1918 sub ] exp/tandem2uc-tri4/decode_eval3_csj.si/wer_20_1.0 %WER 13.44 [ 2311 / 17189, 430 ins, 340 del, 1541 sub ] exp/tandem2uc-tri4_mmi_b0.1/decode_eval3_csj/wer_20_1.0 -EOF \ No newline at end of file +EOF diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh index c487f1bd222..7f407552c2e 100755 --- a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh +++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh @@ -156,7 +156,7 @@ if [ $stage -le 19 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/fisher_english/s5/local/chain/run_tdnn.sh b/egs/fisher_english/s5/local/chain/run_tdnn.sh index 14174e617c4..1fd0f1fdf3a 100755 --- a/egs/fisher_english/s5/local/chain/run_tdnn.sh +++ b/egs/fisher_english/s5/local/chain/run_tdnn.sh @@ -112,7 +112,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh index e95de232304..b76efc4f1de 100644 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh @@ -231,7 +231,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh index e76df666e8a..b1c133942ef 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh @@ -142,7 +142,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh index 2d5b2f8480e..53aac8c08ea 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh @@ -250,7 +250,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh index cbf0ef6cb6c..c12f604f26b 100755 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh @@ -133,7 +133,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh index 12b3187a5fa..efcd1eced4a 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh @@ -129,7 +129,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh index 7d640c3262a..e4a555abfdd 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh @@ -134,7 +134,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" output_opts="l2-regularize=0.0005 bottleneck-dim=256" diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh index 07e88b59ddc..5650cedca28 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh @@ -142,7 +142,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" mkdir -p $dir/configs diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh index c9d50d1f7bd..f3cc869e6de 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh @@ -151,7 +151,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0" mkdir -p $dir/configs diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh index 1cce08abeee..059a81e15fc 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh @@ -148,7 +148,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0 " mkdir -p $dir/configs diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh index 2334c6a1bc1..d86b699d6f6 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh @@ -149,7 +149,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0 " mkdir -p $dir/configs diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh index d52644a66d1..66c5ad3335f 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh @@ -90,7 +90,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh index 0134e63bce2..1981bb0530d 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh @@ -98,7 +98,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh index 36ea128fdde..6fa10344cfc 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh @@ -97,7 +97,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh index be21f2402a9..1f4b7e12850 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh @@ -96,7 +96,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/gale_arabic/s5/local/gale_format_data.sh b/egs/gale_arabic/s5/local/gale_format_data.sh index 85a946a58d9..053323dc194 100755 --- a/egs/gale_arabic/s5/local/gale_format_data.sh +++ b/egs/gale_arabic/s5/local/gale_format_data.sh @@ -57,4 +57,4 @@ fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ echo gale_format_data succeeded. -exit 0 \ No newline at end of file +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_train_lms.sh b/egs/gale_arabic/s5/local/gale_train_lms.sh index 1b5d4665a19..8f8e715390f 100755 --- a/egs/gale_arabic/s5/local/gale_train_lms.sh +++ b/egs/gale_arabic/s5/local/gale_train_lms.sh @@ -113,4 +113,4 @@ fi echo train lm succeeded -exit 0 \ No newline at end of file +exit 0 diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh index a3ccfda04ac..bf2e45c9914 100755 --- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -108,7 +108,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh index 604f32a1de4..deebafc95e4 100755 --- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -120,7 +120,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/gp/s1/local/gp_convert_audio.sh b/egs/gp/s1/local/gp_convert_audio.sh index a7c2d7285c4..b3db909c9b6 100755 --- a/egs/gp/s1/local/gp_convert_audio.sh +++ b/egs/gp/s1/local/gp_convert_audio.sh @@ -108,4 +108,4 @@ done < "$INLIST" echo "sox: error converting following $nsoxerr file(s):" >&2 [ -f "$soxerr" ] && cat "$soxerr" >&2 -exit 0; \ No newline at end of file +exit 0; diff --git a/egs/gp/s1/utils/mkgraph.sh b/egs/gp/s1/utils/mkgraph.sh index 2e45296593b..3aba742832d 100755 --- a/egs/gp/s1/utils/mkgraph.sh +++ b/egs/gp/s1/utils/mkgraph.sh @@ -131,4 +131,4 @@ cp $lang/silphones.csl $dir/ # to make const fst: # fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst -echo "Finished making decoding graphs in $dir" \ No newline at end of file +echo "Finished making decoding graphs in $dir" diff --git a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index 1112f0ec08b..361879b4142 100755 --- a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -149,7 +149,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03" ivector_layer_opts="l2-regularize=0.03" diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh index 6dde42bef79..290bd4c7970 100755 --- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh @@ -150,7 +150,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.01" output_opts="l2-regularize=0.0025" diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh index d255d85327f..cfb4dc1f697 100755 --- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh @@ -151,7 +151,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.03 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.03 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.03 orthonormal-constraint=-1.0" diff --git a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh index f771387785c..c62b776de2b 100755 --- a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh +++ b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh @@ -109,7 +109,7 @@ if [ $stage -le 12 ]; then ivector_dim=$(feat-to-dim scp:exp/nnet3/ivectors_${train_set}/ivector_online.scp -) feat_dim=$(feat-to-dim scp:data/${train_set}_hires/feats.scp -) num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.004 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" linear_opts="orthonormal-constraint=-1.0 l2-regularize=0.004" output_opts="l2-regularize=0.002" diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index 81915fec5a6..d1b657a2d74 100755 --- a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -147,7 +147,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03" ivector_affine_opts="l2-regularize=0.03" diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh index 23a55f93023..40bbbe1ae79 100755 --- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh @@ -136,7 +136,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh index 724bb1e0794..a498d8157f3 100755 --- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh @@ -147,7 +147,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.03 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.03 dropout-proportion=0.0 bypass-scale=0.66" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh index 1253bbe5aa3..ef1273f3961 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh @@ -128,7 +128,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh index a8d7f6c6091..bbcc55aa2b0 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh @@ -125,7 +125,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh index f5dbb93e7b7..401ffa14e19 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh @@ -124,7 +124,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh index 1dd83c5078f..17209b9204f 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh @@ -122,7 +122,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh index 3979b3d2da0..89a40ed2a13 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh @@ -127,7 +127,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index f95f6a90ca1..703d404159a 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -121,7 +121,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index 81700ce2180..905c4661477 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -117,7 +117,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh index 047d673db17..26b1aca0929 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh @@ -119,7 +119,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh index a80bb02290b..9a01688ba35 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -135,7 +135,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh index 6615c4669d6..28aa246f334 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -137,7 +137,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh index f44c073635e..f158317950a 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh @@ -139,7 +139,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh index e7d9246fb89..1c44057454a 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh @@ -137,7 +137,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh index d320f49d3aa..10650a18269 100755 --- a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh @@ -136,7 +136,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.08 dropout-per-dim-continuous=true" output_opts="l2-regularize=0.02 bottleneck-dim=256" diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh index 56f5255288c..db62e6f8a55 100755 --- a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh @@ -136,7 +136,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.08 dropout-per-dim=true dropout-per-dim-continuous=true" linear_opts="orthonormal-constraint=-1.0" output_opts="l2-regularize=0.04" diff --git a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh index b0e147d157b..b0ecd547741 100755 --- a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh +++ b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh @@ -123,7 +123,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs diff --git a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh index b1f33b41a0c..7f3132d657e 100755 --- a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh @@ -128,7 +128,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index 2a60587fc35..8ebca6fd650 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -112,7 +112,7 @@ if [ $stage -le 14 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.01" ivector_affine_opts="l2-regularize=0.0" affine_opts="l2-regularize=0.008 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh index 7129827fe19..57f50df761d 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh @@ -122,7 +122,7 @@ if [ $stage -le 14 ]; then # create the config files for nnet initialization num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh index 29ebe62ddde..3970fa8c4d9 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh @@ -112,7 +112,7 @@ if [ $stage -le 14 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" output_opts="l2-regularize=0.0005 bottleneck-dim=256" diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh index 81b621ef86f..5c488362e59 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh @@ -207,7 +207,7 @@ if [ $stage -le 14 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.008 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.008 dropout-proportion=0.0 bypass-scale=0.75" linear_opts="l2-regularize=0.008 orthonormal-constraint=-1.0" diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 812bf5e7fc5..4277f769119 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -85,7 +85,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh index d9f20fae011..383cc533270 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -120,7 +120,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh index d449805be1d..892ee441516 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh @@ -115,7 +115,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh index 23c4d5c2036..7ca7c652fd2 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh @@ -112,7 +112,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index ee84ea0d83f..a8bc1836ffe 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -116,7 +116,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index c6052b76e7f..0828e051dcc 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -129,7 +129,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh index e0cca104f50..ccbb7119674 100755 --- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh +++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh @@ -124,7 +124,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh index d17b3e3c9c5..164d62a7ad9 100755 --- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh @@ -122,7 +122,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh index d53949dd3de..be51bdcc3d1 100755 --- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh @@ -119,7 +119,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh index 5a3b85422f6..aa61620a92f 100755 --- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh +++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh @@ -123,7 +123,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index 0b86ace2de1..c8f2503b578 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -144,7 +144,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03" ivector_affine_opts="l2-regularize=0.03" diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh index 642c20ec191..da16297c9dd 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh @@ -147,7 +147,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh index 110b7b87415..3d0c2d63902 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh @@ -154,7 +154,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh index fe6f1b50f9e..081af8fe2f8 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh @@ -150,7 +150,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh index 225b36f909c..04df38d4da3 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh @@ -150,7 +150,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh index 565387003ff..cdf9bb584f4 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh @@ -148,7 +148,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05" output_opts="l2-regularize=0.01" diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh index 9cc6d93022a..d1385ff2be5 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh @@ -156,7 +156,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05" output_opts="l2-regularize=0.02 bottleneck-dim=192" diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh index e234b847aa7..ad51780e191 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh @@ -155,7 +155,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05 dropout-per-dim-continuous=true" output_opts="l2-regularize=0.02 bottleneck-dim=192" diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh index 18540806028..dbfe5c5a07a 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh @@ -168,7 +168,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05 dropout-per-dim-continuous=true" output_opts="l2-regularize=0.02 bottleneck-dim=192" diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh index 776247f5ea3..cc4123e2755 100755 --- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh @@ -151,7 +151,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.03 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.03 dropout-proportion=0.0 bypass-scale=0.66" diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh index de858973c98..c2f90df4b5c 100755 --- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh +++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh @@ -99,7 +99,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh index ba4ecc268df..2b3c2844972 100755 --- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh +++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh @@ -102,7 +102,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 delay=-3 dropout-proportion=0.0" mkdir -p $dir/configs diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh index 74df56b0537..5118cb0f8bd 100755 --- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh +++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh @@ -100,7 +100,7 @@ if [ $stage -le 10 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.05" lstm_opts="l2-regularize=0.01 decay-time=20 delay=-3 dropout-proportion=0.0" output_opts="l2-regularize=0.01" diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh index 9f8c49387b1..96f5fdac8f3 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh @@ -132,7 +132,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.0015 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" linear_opts="l2-regularize=0.0015 orthonormal-constraint=-1.0" output_opts="l2-regularize=0.001" diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 5793fef0fc2..62266334962 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -155,7 +155,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="dropout-proportion=0.0 decay-time=40" relu_dim=1024 diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh index 98e7c2ed6c1..79cd3eb3014 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -150,7 +150,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0 " mkdir -p $dir/configs diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh index 8b1f34b15a6..a7170af9431 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh @@ -146,7 +146,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0 " mkdir -p $dir/configs diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh index 61cc8b97d41..c8b4997161e 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh @@ -133,7 +133,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05" output_opts="l2-regularize=0.01 bottleneck-dim=320" diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 9369e00a7ba..4723400c76b 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -141,7 +141,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=40" diff --git a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 4eb3e5e1e76..33eb9dcb98c 100755 --- a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -151,7 +151,7 @@ if [ $stage -le 5 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh index 6b6c08e779a..2fd2556c19b 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh @@ -130,7 +130,7 @@ if [ $stage -le 7 ]; then echo " generating new layers, that are specific to rm. These layers "; echo " are added to the transferred part of the wsj network."; num_targets=$(tree-info --print-args=false $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/rm/s5/local/run_vtln2.sh b/egs/rm/s5/local/run_vtln2.sh index 6437032ca61..b87030d2e3d 100755 --- a/egs/rm/s5/local/run_vtln2.sh +++ b/egs/rm/s5/local/run_vtln2.sh @@ -59,4 +59,4 @@ steps/compute_cmvn_stats.sh data/test_vtln exp/make_mfcc/test_vtln $featdir # %WER 3.13 [ 392 / 12533, 59 ins, 64 del, 269 sub ] exp/tri3b/decode.si/wer_3 # %WER 10.36 [ 1298 / 12533, 147 ins, 192 del, 959 sub ] exp/tri3b/decode_ug/wer_12 # %WER 13.48 [ 1689 / 12533, 159 ins, 277 del, 1253 sub ] exp/tri3b/decode_ug.si/wer_13 -# a04:s5: \ No newline at end of file +# a04:s5: diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh index ec6b8941955..47557f93696 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh @@ -152,7 +152,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh index 53aa92710e8..7afa1b7f902 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh @@ -153,7 +153,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh index 83c2f3607f0..e69e499e152 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh @@ -151,7 +151,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh index 2665ea91ff8..86e0352828c 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh @@ -164,7 +164,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh index 80f67d34ba9..313f899a471 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh @@ -152,7 +152,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh index e242660a10e..600f27ddf86 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh @@ -135,7 +135,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 86dc4b75a24..cedc448464a 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -145,7 +145,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/sprakbanken/s5/local/norm_dk/write_punct.sh b/egs/sprakbanken/s5/local/norm_dk/write_punct.sh index 57726bd44cb..3b8decaf376 100755 --- a/egs/sprakbanken/s5/local/norm_dk/write_punct.sh +++ b/egs/sprakbanken/s5/local/norm_dk/write_punct.sh @@ -22,4 +22,4 @@ perl -pe 's/([\n ])\;([ \n])/\1SEMIKOLON\2/g' | \ perl -pe 's/([\n ])_NL_([ \n])/\1NY LINJE\2/g' | \ perl -pe 's/([\n ])_NS_([ \n])/\1NYT AFSNIT\2/g' | \ -tr -s ' ' \ No newline at end of file +tr -s ' ' diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh index 6792332da56..20dcab8eb50 100755 --- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh +++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh @@ -152,7 +152,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh index ae7c97e7d08..acdae844b65 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh @@ -120,7 +120,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh index 90d672b9ae9..bbd8cb63697 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh @@ -116,7 +116,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh index 68daf81ab01..16f2ea211d0 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh @@ -125,7 +125,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0" diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh index 4668aac9ebc..09f7d72434c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh @@ -124,7 +124,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh index 22316d56ed2..8e44d0bc114 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh @@ -123,7 +123,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh index ad2ac4bf043..6a836e81b09 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh @@ -125,7 +125,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh index 174925315a0..d1a61360f85 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -112,7 +112,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.01" ivector_affine_opts="l2-regularize=0.01" diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh index e432435a551..48db81f586f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh @@ -119,7 +119,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh index b9b7152dcbe..021eab09506 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh @@ -121,7 +121,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh index 12564c4faae..f219167f9ec 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh @@ -131,7 +131,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh index fa6518a9ad9..0623d26a9e4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh @@ -117,7 +117,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh index 9dfaa1d4509..dbbe3c1e6fd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh @@ -120,7 +120,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh index c5b5633d94c..2a8a658bf6b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh @@ -113,7 +113,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index 793b40f7fe3..a9eba36ddaa 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -112,7 +112,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh index bd47ed61f23..8e0b290cf87 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh @@ -114,7 +114,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh index f7681a743e1..bb9ddf209d6 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh @@ -112,7 +112,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh index 03b1ee3c97f..97f92c14f1f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh @@ -122,7 +122,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh index 0fa7353edb2..d9fe106e5d7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh @@ -452,7 +452,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" linear_opts="orthonormal-constraint=1.0" output_opts="l2-regularize=0.0005" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh index cf4855db611..99e43443f99 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh @@ -119,7 +119,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" output_opts="l2-regularize=0.0005 bottleneck-dim=256" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh index fb47b1e88ad..44ca3b3d279 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh @@ -126,7 +126,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.004 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" linear_opts="orthonormal-constraint=-1.0 l2-regularize=0.004" output_opts="l2-regularize=0.002" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh index 096ed9c54fd..d19a4ef4c0b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh @@ -114,7 +114,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.004 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" linear_opts="orthonormal-constraint=-1.0 l2-regularize=0.004" output_opts="l2-regularize=0.002" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh index 8eab54a9dc2..cea0891d5d7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh @@ -118,7 +118,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh index 3ce4fa68397..d4febd61e94 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh @@ -122,7 +122,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh index 7854bac44c5..4414147bf0e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh @@ -120,7 +120,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh index 3929cdc432e..cd9d4dc6f2b 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh @@ -122,7 +122,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh index 311fe15d895..18b660b4080 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh @@ -119,7 +119,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh index 4894e492542..be615e0e361 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh @@ -112,7 +112,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20 dropout-proportion=0.0" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh index 32234ff009c..43855e6f7ce 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -118,7 +118,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh index 1d305186fc2..5c82ed0eb11 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -114,7 +114,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh index e2492ee277b..c3df0bf2b2c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -121,7 +121,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh index 2028e20ff00..3d353387239 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -140,7 +140,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh index bf3eddb90ae..2a2d508ecdd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -135,7 +135,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh index e500ee0a9a8..5af5463b372 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -152,7 +152,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh index 9b3a5d29957..28105a587ec 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -134,7 +134,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=15" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh index ca578195323..d6e81f2d8eb 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -131,7 +131,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh index a0848cc8894..060d98c9d05 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -152,7 +152,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh index 84258624447..9bd39a262c5 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -119,7 +119,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh index 258f067cf2b..ccd6138da6e 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -129,7 +129,7 @@ if [ $stage -le 12 ]; then num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=20" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh index 0a518572201..f702033377a 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -120,7 +120,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh index 3a2b34792f3..b43577bd76c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -128,7 +128,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) lstm_opts="decay-time=40" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh index 34fcf731639..5bb6e7da152 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -125,7 +125,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.002" linear_opts="orthonormal-constraint=1.0" diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh index 18d3f81ffde..4db38d74508 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -134,7 +134,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0" mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh index 579008b5658..7e9dec67068 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh @@ -132,7 +132,7 @@ if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0 gru-nonlinearity-options=\"max-change=0.75\"" mkdir -p $dir/configs diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh index 5e60ee1178c..2ac8c09dad1 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh @@ -139,7 +139,7 @@ if [ $stage -le 17 ]; then lstm_opts="decay-time=20" num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh index ec6b8941955..47557f93696 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh @@ -152,7 +152,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh index 53aa92710e8..7afa1b7f902 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh @@ -153,7 +153,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh index 83c2f3607f0..e69e499e152 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh @@ -151,7 +151,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh index 2665ea91ff8..86e0352828c 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh @@ -164,7 +164,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh index f768c7659d7..0fdb2b3b63e 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh @@ -154,7 +154,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh index 3384b085114..492d3efb804 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh @@ -143,7 +143,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh index 5dd838a15e3..01768c3875f 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh @@ -160,7 +160,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh index 4f86691b752..bb5007f4c9f 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh @@ -151,7 +151,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh index e32c08562c6..1476ed1fd40 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh @@ -143,7 +143,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh index 2eab0285828..47f939fea1c 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh @@ -141,7 +141,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh index 64ce1f02fdd..f02025674e8 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh @@ -142,7 +142,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.008 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.008 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.008 orthonormal-constraint=-1.0" diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh index 8f0be130e27..b03da27e760 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -156,7 +156,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh index fef021c6482..e896a7867b3 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -169,7 +169,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh index d05ae15dfec..00f72fab796 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -160,7 +160,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh index 29d8e69b04c..80a9ed1c4d0 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -165,7 +165,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh index db3fde91656..031978f878a 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -213,7 +213,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh index f6a1d49890d..c60b8f7fefc 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -167,7 +167,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh index ff2c302fdf6..2d2048a6869 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -170,7 +170,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh index d4cb5e85657..a074e128270 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -168,7 +168,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh index 40b1bf7f54a..3bfe175806f 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -189,7 +189,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh index 838f49f977f..acbef783823 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -186,7 +186,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh index b1abfdcf525..173be863608 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -184,7 +184,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) # note: the value of the dropout-proportion is not important, as it's # controlled by the dropout schedule; what's important is that we set it. diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh index ef151d72875..94955d0472c 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -174,7 +174,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) # note: the value of the dropout-proportion is not important, as it's # controlled by the dropout schedule; what's important is that we set it. diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh index c2aac3f6e20..efd3bc98725 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -174,7 +174,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) # note: the value of the dropout-proportion is not important, as it's # controlled by the dropout schedule; what's important is that we set it. diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh index ed6cb66957d..c0559e8d389 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -185,7 +185,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) # note: the value of the dropout-proportion is not important, as it's # controlled by the dropout schedule; what's important is that we set it. diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh index 8a4b7468058..5a6dbaef8af 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -189,7 +189,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) # note: the value of the dropout-proportion is not important, as it's # controlled by the dropout schedule; what's important is that we set it. diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh index 8f80a6885ca..dd38d56759f 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh @@ -187,7 +187,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts='ng-affine-options="update-period=1"' lstmp_opts='ng-affine-options="update-period=1" decay-time=20' diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh index ef1c7fc196f..1378d2d176d 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh @@ -151,7 +151,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh index 19479de41aa..3c4882ec2c6 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh @@ -152,7 +152,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh index 85c0e4a0661..23ea14ae151 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh @@ -145,7 +145,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh index e0431a83ceb..7c44d963504 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh @@ -149,7 +149,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh index e1543c0120f..042ef346578 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh @@ -159,7 +159,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh index d08a7ad5e86..905e1845183 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh @@ -163,7 +163,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh index d256150484b..7bd96e7d82c 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh @@ -150,7 +150,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh index 40cdcb5b5ff..1204ff6ce4c 100755 --- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh @@ -143,7 +143,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh index 9144508e62b..f06ba3fa195 100755 --- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh @@ -148,7 +148,7 @@ if [ $stage -le 17 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/tedlium/s5_r3/local/ted_download_lm.sh b/egs/tedlium/s5_r3/local/ted_download_lm.sh index ad833555b5f..6118876a0ab 100755 --- a/egs/tedlium/s5_r3/local/ted_download_lm.sh +++ b/egs/tedlium/s5_r3/local/ted_download_lm.sh @@ -13,4 +13,4 @@ echo "$0: downloading Tedlium 4 gram language models (it won't re-download if it wget --continue http://kaldi-asr.org/models/5/4gram_small.arpa.gz -P data/local/local_lm/data/arpa || exit 1 wget --continue http://kaldi-asr.org/models/5/4gram_big.arpa.gz -P data/local/local_lm/data/arpa || exit 1 -exit 0 \ No newline at end of file +exit 0 diff --git a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh index a2662584549..ab68ba6fb68 100755 --- a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh @@ -142,7 +142,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) affine_opts="l2-regularize=0.03 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.03 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.03 orthonormal-constraint=-1.0" diff --git a/egs/uw3/v1/local/chain/run_cnn_1a.sh b/egs/uw3/v1/local/chain/run_cnn_1a.sh index 582bfc90105..e3548609da7 100755 --- a/egs/uw3/v1/local/chain/run_cnn_1a.sh +++ b/egs/uw3/v1/local/chain/run_cnn_1a.sh @@ -130,7 +130,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) common1="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=12" mkdir -p $dir/configs diff --git a/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh index 496ee5e84ca..844ccf80677 100755 --- a/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -148,7 +148,7 @@ if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.05 dropout-per-dim-continuous=true" output_opts="l2-regularize=0.02 bottleneck-dim=192" diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index ceca428f5c1..e656b67e529 100755 --- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -167,7 +167,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh index a3a747ed743..9db76e94430 100755 --- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh +++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh @@ -170,7 +170,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh index dc47681593f..36ec5bb61af 100755 --- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh +++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh @@ -155,7 +155,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.01" ivector_affine_opts="l2-regularize=0.01" tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh index 10a9c608811..8d44db6f917 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -183,7 +183,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh index a2bb7e93388..544b9b04a0a 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh @@ -158,7 +158,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh index 7dc30ecf8fe..b268ed7feda 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh @@ -159,7 +159,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh index 603e0f064b9..d1a7f9d0663 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh @@ -159,7 +159,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh index 9808e274d83..e20069fbfa1 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh @@ -167,7 +167,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.01" output_opts="l2-regularize=0.0025" diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh index e3d13ac1f65..86df0779841 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh @@ -161,7 +161,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) opts="l2-regularize=0.01" output_opts="l2-regularize=0.005 bottleneck-dim=320" diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 4b752a55a4b..6e4f220c1f2 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -181,7 +181,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) mkdir -p $dir/configs cat < $dir/configs/network.xconfig diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh index 51fefb9ca88..2d113e58a93 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -473,7 +473,7 @@ if [ $stage -le 15 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.01" output_opts="l2-regularize=0.005 bottleneck-dim=256" lstm_opts="l2-regularize=0.005 self-scale=2.0" diff --git a/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh index e7c125d16de..700b57d9fce 100755 --- a/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh @@ -131,7 +131,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index c43d7c669c1..03333f6d229 100755 --- a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -127,7 +127,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index 8fca9235f46..fd9cdc8921d 100755 --- a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -124,7 +124,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh index 654880fcf59..f6b2c1bac42 100755 --- a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh @@ -143,7 +143,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh index eb688151665..8185fa2645d 100755 --- a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh +++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh @@ -142,7 +142,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 7301db33d85..cd582472993 100755 --- a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -119,7 +119,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index c43d7c669c1..03333f6d229 100755 --- a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -127,7 +127,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index 9a12a5a9e1e..fb15ce10dde 100755 --- a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -125,7 +125,7 @@ if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh index 654880fcf59..f6b2c1bac42 100755 --- a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh @@ -143,7 +143,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh index 08641f6a38a..17d59642b05 100755 --- a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh +++ b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh @@ -142,7 +142,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.03 dropout-proportion=0.0" tdnn_opts="l2-regularize=0.03" output_opts="l2-regularize=0.04" diff --git a/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh index 4183aa74587..0a4e00d7aed 100755 --- a/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh @@ -130,7 +130,7 @@ if [ $stage -le 4 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh index 55e046dd55a..14b9a8d6c8e 100755 --- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh @@ -156,7 +156,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh index 44110888519..28b36243ba3 100755 --- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -158,7 +158,7 @@ if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) gru_opts="dropout-per-frame=true dropout-proportion=0.0" mkdir -p $dir/configs From 1f068cde87755810d874de43040391bd1dc0ac4b Mon Sep 17 00:00:00 2001 From: Peter Smit Date: Sun, 17 Mar 2019 20:54:32 +0100 Subject: [PATCH 087/235] [scripts] Bug-fix for removing deleted words (#3116) The type of --max-deleted-words-kept-when-merging in segment_ctm_edits.py was a string, which prevented the mechanism from working altogether. --- egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py index e571fefb84c..2ea8f5f6070 100755 --- a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py +++ b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py @@ -70,7 +70,7 @@ help="""Minimum duration of silence or non-scored word to be considered a viable split point when truncating based on junk proportion.""") -parser.add_argument("--max-deleted-words-kept-when-merging", type = str, default = 1, +parser.add_argument("--max-deleted-words-kept-when-merging", type = int, default = 1, help = "When merging segments that are found to be overlapping or " "adjacent after all other processing, keep in the transcript the " "reference words that were deleted between the segments [if any] " From 8d60ee3d8b29b89fd2e4e800afde1627bcb6fecf Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 17 Mar 2019 19:22:08 -0400 Subject: [PATCH 088/235] [scripts] Add fix regarding num-jobs for segment_long_utterances*.sh(#3130) --- egs/wsj/s5/steps/cleanup/segment_long_utterances.sh | 9 ++++++++- .../s5/steps/cleanup/segment_long_utterances_nnet3.sh | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh index c7e50ea165e..7a16bdcdb12 100755 --- a/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh +++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh @@ -174,10 +174,17 @@ if [ $stage -le 3 ]; then cp $srcdir/phones.txt $dir 2>/dev/null || true mkdir -p $graph_dir + + n_reco=$(cat $text | wc -l) || exit 1 + nj_reco=$nj + + if [ $nj -gt $n_reco ]; then + nj_reco=$n_reco + fi # Make graphs w.r.t. to the original text (usually recording-level) steps/cleanup/make_biased_lm_graphs.sh $graph_opts \ - --nj $nj --cmd "$cmd" $text \ + --nj $nj_reco --cmd "$cmd" $text \ $lang $dir $dir/graphs if [ -z "$utt2text" ]; then # and then copy it to the sub-segments. diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh index 751200bdf83..f0df1e7730c 100755 --- a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh +++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh @@ -235,10 +235,17 @@ if [ $stage -le 3 ]; then mkdir -p $graph_dir + n_reco=$(cat $text | wc -l) || exit 1 + nj_reco=$nj + + if [ $nj -gt $n_reco ]; then + nj_reco=$n_reco + fi + # Make graphs w.r.t. to the original text (usually recording-level) steps/cleanup/make_biased_lm_graphs.sh $graph_opts \ --scale-opts "$scale_opts" \ - --nj $nj --cmd "$cmd" $text \ + --nj $nj_reco --cmd "$cmd" $text \ $lang $dir $dir/graphs if [ -z "$utt2text" ]; then # and then copy it to the sub-segments. From 6595b429f3e743f779f8ef7f3e9f605bb6bd8105 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Mon, 18 Mar 2019 14:56:40 +0000 Subject: [PATCH 089/235] Added steps for generating POCOLM ARPA file --- .../s5_gigaword/local/train_pocolm.sh | 7 +++++-- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh index 964dd3bbcc5..b8b3ca35ef9 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh @@ -43,8 +43,11 @@ if [ $stage -le -1 ];then python local/get_unigram_weights_vocab.py "$pocolm_dir"/lm/0_2.pocolm/ "$textdir"/unigram_weights bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" - - + prune_lm_dir.py --target-num-ngrams=$prune_size "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm \ + "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned_"$prune_size" + mkdir -p "$pocolm_dir"/arpa + format_arpa_lm.py "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned_"$prune_size" | \ + gzip -c > "$pocolm_dir"/arpa/"$num_words_pocolm"_3_pruned_"$prune_size".arpa.gz fi diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index b63b5208138..1ad8f9f1e0b 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -6,6 +6,7 @@ stage=-1 lmstage=-2 +addtraintext=true num_words_pocolm=110000 train_sgmm2=false @@ -95,6 +96,9 @@ if [ $stage -le 0 ]; then cut -d " " -f 2- data/train/text > "$rnnlm_workdir"/text_lm/train.txt cut -d " " -f 2- data/dev2/text > "$rnnlm_workdir"/text_lm/dev.txt # For RNNLM and POCOLM training we use dev2/text as dev file. cp "$rnnlm_workdir"/normalised_gigaword_corpus/text_normalized "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt + if $addtraintext; then + cat "$rnnlm_workdir"/text_lm/train.txt >> "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt + fi fi if [ $stage -le 1 ]; then From 7fb716aa0f56480af31514c7e362db5c9f787fd4 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Mon, 18 Mar 2019 17:19:20 -0400 Subject: [PATCH 090/235] [src] Enable allow_{upsample,downsample} with online features (#3139) --- src/feat/online-feature.cc | 98 ++++++++++++++++++++++++++++---------- src/feat/online-feature.h | 11 +++-- src/feat/resample.h | 4 ++ 3 files changed, 85 insertions(+), 28 deletions(-) diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc index 90170a266e5..a60e7fb8d61 100644 --- a/src/feat/online-feature.cc +++ b/src/feat/online-feature.cc @@ -24,7 +24,7 @@ namespace kaldi { -RecyclingVector::RecyclingVector(int items_to_hold) : +RecyclingVector::RecyclingVector(int items_to_hold): items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold), first_available_index_(0) { } @@ -38,7 +38,8 @@ RecyclingVector::~RecyclingVector() { Vector *RecyclingVector::At(int index) const { if (index < first_available_index_) { KALDI_ERR << "Attempted to retrieve feature vector that was " - "already removed by the RecyclingVector (index = " << index << "; " + "already removed by the RecyclingVector (index = " + << index << "; " << "first_available_index = " << first_available_index_ << "; " << "size = " << Size() << ")"; } @@ -59,43 +60,93 @@ int RecyclingVector::Size() const { return first_available_index_ + items_.size(); } - -template +template void OnlineGenericBaseFeature::GetFrame(int32 frame, VectorBase *feat) { feat->CopyFromVec(*(features_.At(frame))); }; -template +template OnlineGenericBaseFeature::OnlineGenericBaseFeature( const typename C::Options &opts): computer_(opts), window_function_(computer_.GetFrameOptions()), features_(opts.frame_opts.max_feature_vectors), input_finished_(false), waveform_offset_(0) { } -template -void OnlineGenericBaseFeature::AcceptWaveform(BaseFloat sampling_rate, - const VectorBase &waveform) { + +template +void OnlineGenericBaseFeature::MaybeCreateResampler( + BaseFloat sampling_rate) { BaseFloat expected_sampling_rate = computer_.GetFrameOptions().samp_freq; - if (sampling_rate != expected_sampling_rate) + + if (resampler_ != nullptr) { + KALDI_ASSERT(resampler_->GetInputSamplingRate() == sampling_rate); + KALDI_ASSERT(resampler_->GetOutputSamplingRate() == expected_sampling_rate); + } else if (((sampling_rate > expected_sampling_rate) && + !computer_.GetFrameOptions().allow_downsample) || + ((sampling_rate > expected_sampling_rate) && + !computer_.GetFrameOptions().allow_upsample)) { + resampler_.reset(new LinearResample( + sampling_rate, expected_sampling_rate, + std::min(sampling_rate / 2, expected_sampling_rate / 2), 6)); + } else if (sampling_rate != expected_sampling_rate) { KALDI_ERR << "Sampling frequency mismatch, expected " - << expected_sampling_rate << ", got " << sampling_rate; - if (waveform.Dim() == 0) + << expected_sampling_rate << ", got " << sampling_rate + << "\nPerhaps you want to use the options " + "--allow_{upsample,downsample}"; + } +} + +template +void OnlineGenericBaseFeature::InputFinished() { + if (resampler_ != nullptr) { + Vector appended_wave; + Vector resampled_wave; + resampler_->Resample(appended_wave, true, &resampled_wave); + + if (waveform_remainder_.Dim() != 0) + appended_wave.Range(0, waveform_remainder_.Dim()) + .CopyFromVec(waveform_remainder_); + appended_wave.Range(waveform_remainder_.Dim(), resampled_wave.Dim()) + .CopyFromVec(resampled_wave); + waveform_remainder_.Swap(&appended_wave); + } + input_finished_ = true; + ComputeFeatures(); +} + +template +void OnlineGenericBaseFeature::AcceptWaveform( + BaseFloat sampling_rate, const VectorBase &original_waveform) { + if (original_waveform.Dim() == 0) return; // Nothing to do. if (input_finished_) KALDI_ERR << "AcceptWaveform called after InputFinished() was called."; - // append 'waveform' to 'waveform_remainder_.' - Vector appended_wave(waveform_remainder_.Dim() + waveform.Dim()); + + Vector appended_wave; + Vector resampled_wave; + + const VectorBase *waveform; + + MaybeCreateResampler(sampling_rate); + if (resampler_ == nullptr) { + waveform = &original_waveform; + } else { + resampler_->Resample(original_waveform, false, &resampled_wave); + waveform = &resampled_wave; + } + + appended_wave.Resize(waveform_remainder_.Dim() + waveform->Dim()); if (waveform_remainder_.Dim() != 0) - appended_wave.Range(0, waveform_remainder_.Dim()).CopyFromVec( - waveform_remainder_); - appended_wave.Range(waveform_remainder_.Dim(), waveform.Dim()).CopyFromVec( - waveform); + appended_wave.Range(0, waveform_remainder_.Dim()) + .CopyFromVec(waveform_remainder_); + appended_wave.Range(waveform_remainder_.Dim(), waveform->Dim()) + .CopyFromVec(*waveform); waveform_remainder_.Swap(&appended_wave); ComputeFeatures(); } -template +template void OnlineGenericBaseFeature::ComputeFeatures() { const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions(); int64 num_samples_total = waveform_offset_ + waveform_remainder_.Dim(); @@ -145,7 +196,6 @@ template class OnlineGenericBaseFeature; template class OnlineGenericBaseFeature; template class OnlineGenericBaseFeature; - OnlineCmvnState::OnlineCmvnState(const OnlineCmvnState &other): speaker_cmvn_stats(other.speaker_cmvn_stats), global_cmvn_stats(other.global_cmvn_stats), @@ -173,8 +223,6 @@ void OnlineCmvnState::Read(std::istream &is, bool binary) { ExpectToken(is, binary, ""); } - - OnlineCmvn::OnlineCmvn(const OnlineCmvnOptions &opts, const OnlineCmvnState &cmvn_state, OnlineFeatureInterface *src): @@ -328,7 +376,8 @@ void OnlineCmvn::SmoothOnlineCmvnStats(const MatrixBase &speaker_stats, // If count exceeded cmn_window it would be an error in how "window_stats" // was accumulated. KALDI_ASSERT(cur_count <= 1.001 * opts.cmn_window); - if (cur_count >= opts.cmn_window) return; + if (cur_count >= opts.cmn_window) + return; if (speaker_stats.NumRows() != 0) { // if we have speaker stats.. double count_from_speaker = opts.cmn_window - cur_count, speaker_count = speaker_stats(0, dim); @@ -341,7 +390,8 @@ void OnlineCmvn::SmoothOnlineCmvnStats(const MatrixBase &speaker_stats, speaker_stats); cur_count = (*stats)(0, dim); } - if (cur_count >= opts.cmn_window) return; + if (cur_count >= opts.cmn_window) + return; if (global_stats.NumRows() != 0) { double count_from_global = opts.cmn_window - cur_count, global_count = global_stats(0, dim); @@ -433,7 +483,7 @@ void OnlineCmvn::SetState(const OnlineCmvnState &cmvn_state) { int32 OnlineSpliceFrames::NumFramesReady() const { int32 num_frames = src_->NumFramesReady(); - if (num_frames > 0 && src_->IsLastFrame(num_frames-1)) + if (num_frames > 0 && src_->IsLastFrame(num_frames - 1)) return num_frames; else return std::max(0, num_frames - right_context_); diff --git a/src/feat/online-feature.h b/src/feat/online-feature.h index d47a6b13e9b..4f66ffef2ff 100644 --- a/src/feat/online-feature.h +++ b/src/feat/online-feature.h @@ -113,10 +113,7 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { // more waveform. This will help flush out the last frame or two // of features, in the case where snip-edges == false; it also // affects the return value of IsLastFrame(). - virtual void InputFinished() { - input_finished_ = true; - ComputeFeatures(); - } + virtual void InputFinished(); private: // This function computes any additional feature frames that it is possible to @@ -127,8 +124,14 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { // waveform_remainder_ while incrementing waveform_offset_ by the same amount. void ComputeFeatures(); + void MaybeCreateResampler(BaseFloat sampling_rate); + C computer_; // class that does the MFCC or PLP or filterbank computation + // resampler in cases when the input sampling frequency is not equal to + // the expected sampling rate + std::unique_ptr resampler_; + FeatureWindowFunction window_function_; // features_ is the Mfcc or Plp or Fbank features that we have already computed. diff --git a/src/feat/resample.h b/src/feat/resample.h index ecac2ba7566..e0b4688c99b 100644 --- a/src/feat/resample.h +++ b/src/feat/resample.h @@ -185,6 +185,10 @@ class LinearResample { /// Resample(x, y, true) for the last piece. Call it unnecessarily between /// signals will not do any harm. void Reset(); + + //// Return the input and output sampling rates (for checks, for example) + inline int32 GetInputSamplingRate() { return samp_rate_in_; } + inline int32 GetOutputSamplingRate() { return samp_rate_out_; } private: /// This function outputs the number of output samples we will output /// for a signal with "input_num_samp" input samples. If flush == true, From 80c14376759689347f7c375dce492dad12a385f5 Mon Sep 17 00:00:00 2001 From: Andrey Dorozhkin Date: Tue, 19 Mar 2019 18:48:05 +0300 Subject: [PATCH 091/235] [src] Fix bad assert in fstmakecontextsyms (#3142) --- src/fstext/context-fst.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fstext/context-fst.cc b/src/fstext/context-fst.cc index 9936a398e37..1e41adc021f 100644 --- a/src/fstext/context-fst.cc +++ b/src/fstext/context-fst.cc @@ -345,7 +345,7 @@ SymbolTable *CreateILabelInfoSymbolTable(const vector > &info, const SymbolTable &phones_symtab, std::string separator, std::string initial_disambig) { // e.g. separator = "/", initial-disambig="#-1" - KALDI_ASSERT(!info.empty() && !info[0].empty()); + KALDI_ASSERT(!info.empty() && info[0].empty()); SymbolTable *ans = new SymbolTable("ilabel-info-symtab"); int64 s = ans->AddSymbol(phones_symtab.Find(static_cast(0))); assert(s == 0); From 0d6ead55be92b883c0b0eb7f9bfebb1e5ab6784d Mon Sep 17 00:00:00 2001 From: David Zurow Date: Tue, 19 Mar 2019 11:53:42 -0400 Subject: [PATCH 092/235] [src] Fix to "Fixes to grammar-fst & LM-disambig symbols" (#3000) (#3143) --- src/decoder/grammar-fst.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/decoder/grammar-fst.cc b/src/decoder/grammar-fst.cc index ab1a8142c1d..1b79e7b5521 100644 --- a/src/decoder/grammar-fst.cc +++ b/src/decoder/grammar-fst.cc @@ -706,7 +706,7 @@ bool GrammarFstPreparer::IsEntryState(StateId s) const { // we check that at least one has label with nonterminal equal to #nonterm_begin... // in fact they will all have this value if at least one does, and this was checked // in NeedEpsilons(). - if (nonterminal == kNontermBegin) + if (nonterminal == GetPhoneSymbolFor(kNontermBegin)) return true; } return false; From 338b5868c78fbae5dd4e3c4b74ac59675a1513f3 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Tue, 19 Mar 2019 15:13:18 -0400 Subject: [PATCH 093/235] [build] Make sure PaUtils exported from portaudio (#3144) --- src/onlinebin/online-gmm-decode-faster.cc | 8 ++++---- tools/extras/install_portaudio.sh | 18 ++++++++++-------- tools/extras/portaudio.patch | 21 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 12 deletions(-) create mode 100644 tools/extras/portaudio.patch diff --git a/src/onlinebin/online-gmm-decode-faster.cc b/src/onlinebin/online-gmm-decode-faster.cc index 8ad86a489d4..46904dbc59e 100644 --- a/src/onlinebin/online-gmm-decode-faster.cc +++ b/src/onlinebin/online-gmm-decode-faster.cc @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) { OnlineFeatureMatrixOptions feature_reading_opts; decoder_opts.Register(&po, true); feature_reading_opts.Register(&po); - + po.Register("left-context", &left_context, "Number of frames of left context"); po.Register("right-context", &right_context, "Number of frames of right context"); po.Register("acoustic-scale", &acoustic_scale, @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) { po.PrintUsage(); return 1; } - + std::string model_rxfilename = po.GetArg(1), fst_rxfilename = po.GetArg(2), word_syms_filename = po.GetArg(3), @@ -151,7 +151,7 @@ int main(int argc, char *argv[]) { opts.order = kDeltaOrder; feat_transform = new OnlineDeltaInput(opts, &cmn_input); } - + // feature_reading_opts contains number of retries, batch size. OnlineFeatureMatrix feature_matrix(feature_reading_opts, feat_transform); @@ -200,4 +200,4 @@ int main(int argc, char *argv[]) { return -1; } #endif -} // main() +} // main() diff --git a/tools/extras/install_portaudio.sh b/tools/extras/install_portaudio.sh index 58797f554e8..36c95047a7f 100755 --- a/tools/extras/install_portaudio.sh +++ b/tools/extras/install_portaudio.sh @@ -14,10 +14,10 @@ #See the Apache 2 License for the specific language governing permissions and #limitations under the License. # -#This script attempts to install port audio, which is needed for the run-on -#decoding stuff. Portaudio enables the decoder to grab a live audio stream -#from the soundcard. I tested portaudio on Linux (RedHat and Suse Linux) and -#on MacOS 10.7. On Linux, it compiles out of the box. For MacOS 10.7, +#This script attempts to install port audio, which is needed for the run-on +#decoding stuff. Portaudio enables the decoder to grab a live audio stream +#from the soundcard. I tested portaudio on Linux (RedHat and Suse Linux) and +#on MacOS 10.7. On Linux, it compiles out of the box. For MacOS 10.7, #it is necessary to edit the Makefile (this script tries to do that). #The script will remove all occurances of # @@ -29,8 +29,8 @@ #also, it seems that one has to uncomment the inclusion of AudioToolbox in #include/pa_mac_core.h # -#All this should make it compile fine for x86_64 under MacOS 10.7 -#(always assuming that you installed XCode, wget and +#All this should make it compile fine for x86_64 under MacOS 10.7 +#(always assuming that you installed XCode, wget and #the Linux environment stuff on MacOS) echo "****() Installing portaudio" @@ -38,7 +38,7 @@ echo "****() Installing portaudio" if [ ! -e pa_stable_v19_20111121.tgz ]; then echo "Could not find portaudio tarball pa_stable_v19_20111121.tgz" echo "Trying to download it via wget!" - + if ! which wget >&/dev/null; then echo "This script requires you to first install wget" echo "You can also just download pa_stable_v19_20111121.tgz from" @@ -81,6 +81,8 @@ if [ -z "$MACOS" ]; then echo "${pa_patch}" | patch -p0 Makefile.in fi +patch -p0 Makefile.in < ../extras/portaudio.patch +autoconf ./configure --prefix=`pwd`/install --with-pic perl -i -pe 's:src/common/pa_ringbuffer.o:: if /^OTHER_OBJS\s*=/' Makefile @@ -93,7 +95,7 @@ if [ "$MACOS" != "" ]; then mv include/pa_mac_core.h include/pa_mac_core.h.bck cat include/pa_mac_core.h.bck \ | sed 's/\/\/\#include \/#include \/g' \ - > include/pa_mac_core.h + > include/pa_mac_core.h fi make diff --git a/tools/extras/portaudio.patch b/tools/extras/portaudio.patch new file mode 100644 index 00000000000..9fc201f9278 --- /dev/null +++ b/tools/extras/portaudio.patch @@ -0,0 +1,21 @@ +diff --git a/Makefile.in b/Makefile.in +index 24129a3..61a3952 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -44,7 +44,7 @@ PALIB = libportaudio.la + PAINC = include/portaudio.h + + PA_LDFLAGS = $(LDFLAGS) $(SHARED_FLAGS) -rpath $(libdir) -no-undefined \ +- -export-symbols-regex "(Pa|PaMacCore|PaJack|PaAlsa|PaAsio|PaOSS)_.*" \ ++ -export-symbols-regex "(Pa|PaUtil|PaMacCore|PaJack|PaAlsa|PaAsio|PaOSS)_.*" \ + -version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) + + COMMON_OBJS = \ +@@ -57,6 +57,7 @@ COMMON_OBJS = \ + src/common/pa_process.o \ + src/common/pa_stream.o \ + src/common/pa_trace.o \ ++ src/common/pa_ringbuffer.o \ + src/hostapi/skeleton/pa_hostapi_skeleton.o + + LOOPBACK_OBJS = \ From 73720e6399197fb88b3e76655a53bbbade2b1d8f Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Wed, 20 Mar 2019 02:17:17 +0100 Subject: [PATCH 094/235] [src] cudamatrix: fixing a synchronization bug in 'normalize-per-row' (#3145) was only apparent using large matrices --- src/cudamatrix/cu-kernels.cu | 19 +++++++------ src/cudamatrix/cu-math-test.cc | 49 ++++++++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 17d56a05772..515412ca398 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -2552,9 +2552,12 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, const int i = blockIdx.x; const int tid = threadIdx.x; const Real* x_row = x + i * x_d.stride; + typedef cub::BlockReduce BlockReduceT; __shared__ typename BlockReduceT::TempStorage temp_storage; - __shared__ Real ssum[CU1DBLOCK]; + + __shared__ Real stddev_div_target_rms; + __shared__ Real scale; // Reduce x_j^2 to CU1DBLOCK elements per row Real tsum = Real(0); @@ -2563,14 +2566,14 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, } tsum = BlockReduceT(temp_storage).Sum(tsum); __syncthreads(); - - const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 - ssum[tid] = sqrt( - fmax(tsum / (target_rms * target_rms * x_d.cols), kSquaredNormFloor)); - - const Real stddev_div_target_rms = ssum[0]; - const Real scale = Real(1) / stddev_div_target_rms; + if (tid == 0) { + const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 + stddev_div_target_rms = sqrt( + fmax(tsum / (target_rms * target_rms * x_d.cols), kSquaredNormFloor)); + scale = Real(1) / stddev_div_target_rms; + } + __syncthreads(); // Store normalized input to output Real* y_row = y + i * y_stride; diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc index 09255c9587b..022742ed29f 100644 --- a/src/cudamatrix/cu-math-test.cc +++ b/src/cudamatrix/cu-math-test.cc @@ -545,6 +545,50 @@ static void UnitTestCuMathNormalizePerRow() { } } + +template +static void UnitTestCuMathNormalizePerRow_v2() { + + int row = 128; + int col = 1024; + + Matrix Hi(row,col); + Matrix Ho(row,col); + Hi.SetRandn(); + Hi.Scale(5.0); + Hi.ApplyFloor(0.0); // like ReLU, + + CuMatrix Di(row, col); + CuMatrix Do(row, col); + Di.CopyFromMat(Hi); + + Real target_rms = 0.3456; + bool add_log_stddev = false; + const Real kSquaredNormFloor = 1.35525271560688e-20; // 2^-66 + + //gpu + cu::NormalizePerRow(Di, target_rms, add_log_stddev, &Do); + + //cpu + { + MatrixBase& in(Hi); + MatrixBase& out(Ho); + Real target_rms=0.3456; + Vector in_norm(in.NumRows()); + Real d_scaled = in.NumCols() * target_rms * target_rms; + in_norm.AddDiagMat2(1.0 / d_scaled, in, kNoTrans, 0.0); + in_norm.ApplyFloor(kSquaredNormFloor); + in_norm.ApplyPow(-0.5); + out.CopyFromMat(in); + out.MulRowsVec(in_norm); + } + + Matrix Ho2(Do); + // here the BUG was detected (by processing big-enough matrix), + AssertEqual(Ho,Ho2,0.00001); +} + + template static void UnitTestCuDiffNormalizePerRow() { for (int32 i = 0; i < 2; i++) { @@ -660,6 +704,7 @@ template void CudaMathUnitTest() { UnitTestEnsureNonzero(); UnitTestBackpropLstmNonlinearity(); UnitTestCuMathNormalizePerRow(); + UnitTestCuMathNormalizePerRow_v2(); UnitTestCuDiffNormalizePerRow(); } @@ -673,9 +718,9 @@ int main() { for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) - CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU + CuDevice::Instantiate().SelectGpuId("no"); // 0 means no GPU else - CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection + CuDevice::Instantiate().SelectGpuId("yes"); // 1 .. automatic selection #endif srand(time(NULL)); kaldi::CudaMathUnitTest(); From f9276a5936a36e1a2c9761df7940347b0dfffa71 Mon Sep 17 00:00:00 2001 From: csukuangfj <5284924+csukuangfj@users.noreply.github.com> Date: Wed, 20 Mar 2019 23:21:22 +0800 Subject: [PATCH 095/235] [src] Fix typo in comment (#3147) --- src/util/edit-distance-inl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/edit-distance-inl.h b/src/util/edit-distance-inl.h index c1d1682804c..3304b27d0bf 100644 --- a/src/util/edit-distance-inl.h +++ b/src/util/edit-distance-inl.h @@ -35,8 +35,8 @@ int32 LevenshteinEditDistance(const std::vector &a, // elements a_0 ... a_{M-1} and b_0 ... b_{N-1}. // We are computing the recursion // E(m, n) = min( E(m-1, n-1) + (1-delta(a_{m-1}, b_{n-1})), - // E(m-1, n), - // E(m, n-1) ). + // E(m-1, n) + 1, + // E(m, n-1) + 1). // where E(m, n) is defined for m = 0..M and n = 0..N and out-of- // bounds quantities are considered to be infinity (i.e. the // recursion does not visit them). From 252690fd0a7fc28f0dff659493f5ef1825b63e65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danijel=20Kor=C5=BEinek?= Date: Wed, 20 Mar 2019 23:35:37 +0100 Subject: [PATCH 096/235] [src] Add binary that functions as a TCP server (#2938) --- src/doc/online_decoding.dox | 64 +++ src/nnet3/decodable-online-looped.cc | 14 +- src/nnet3/decodable-online-looped.h | 16 + src/online2/online-feature-pipeline.h | 2 +- src/online2/online-nnet2-feature-pipeline.cc | 15 + src/online2/online-nnet2-feature-pipeline.h | 14 + src/online2/online-nnet3-decoding.cc | 7 +- src/online2/online-nnet3-decoding.h | 8 +- src/online2bin/Makefile | 3 +- .../online2-tcp-nnet3-decode-faster.cc | 442 ++++++++++++++++++ 10 files changed, 578 insertions(+), 7 deletions(-) create mode 100644 src/online2bin/online2-tcp-nnet3-decode-faster.cc diff --git a/src/doc/online_decoding.dox b/src/doc/online_decoding.dox index 799bfb5895f..dc04d9bef4e 100644 --- a/src/doc/online_decoding.dox +++ b/src/doc/online_decoding.dox @@ -438,6 +438,70 @@ and downloadable models that can be used with online nnet3 decoding, please see http://kaldi-asr.org/models.html (the first model there, the ASPIRE model, includes instructions in a README file). +\subsection online_decoding_nnet3_tcp TCP server for nnet3 online decoding + +The program to run the TCP sever is online2-tcp-nnet3-decode-faster located in the +~/src/online2bin folder. The usage is as follows: + +\verbatim +online2-tcp-nnet3-decode-faster +\endverbatim + +For example: + +\verbatim +online2-tcp-nnet3-decode-faster model/final.mdl graph/HCLG.fst graph/words.txt 5050 +\endverbatim + +The word symbol table is mandatory (unlike other nnet3 online decoding programs) because +the server outputs word strings. Endpointing is mandatory to make the operation of the +program reasonable. Other, non-standard options include: + - samp-freq - sampling frequency of audio (usually 8000 for telephony and 16000 for other uses) + - chunk-length - length of signal being processed by decoder at each step + - output-period - how often we check for changes in the decoding (ie. output refresh rate, default 1s) + - num-threads-startup - number of threads used when initializing iVector extractor + +The TCP protocol simply takes RAW signal on input (16-bit signed integer +encoding at chosen sampling frequency) and outputs simple text using the following +logic: + - each refresh period (output-freq argument) the current state of decoding is output + - each line is terminated by '\r' + - once an utterance boundary is detected due to endpointing a '\n' char is output + +Each output string (delimited by '\r') should be treated as uncertain and can change +entirely until the utterance delimiter ('\n') is sent. The delimiter chars are chosen +specifically in order to make the output look neat in the terminal. It is possible to +use it with other interfaces and a web demo (HTML/JS AudioAPI+WebSockets) exists. + +To run the program from the terminal you can use one of the following commands. First, +make sure the server is running and accepting connections. Using the Aspire models, the +command should look like this: +\verbatim +online2-tcp-nnet3-decode-faster --samp-freq=8000 --frames-per-chunk=20 --extra-left-context-initial=0 + --frame-subsampling-factor=3 --config=model/conf/online.conf --min-active=200 --max-active=7000 + --beam=15.0 --lattice-beam=6.0 --acoustic-scale=1.0 model/final.mdl graph/HCLG.fst graph/words.txt 5050 +\endverbatim + +To send a WAV file into the server, it first needs to be decoded into raw audio, then it can be +sent to the socket: +\verbatim +sox audio.wav -t raw -c 1 -b 16 -r 8k -e signed-integer - | nc -N localhost 5050 +\endverbatim + +It is possible to play audio (almost) simultaneously as decoding. It may require installing the +'pv' program (used to throttle the signal into Kaldi at the same speed as the playback): + +\verbatim +sox audio.wav -t raw -c 1 -b 16 -r 8k -e signed-integer - | \ + tee >(play -t raw -r 8k -e signed-integer -b 16 -c 1 -q -) | \ + pv -L 16000 -q | nc -N localhost 5050 +\endverbatim + +Finally, it is possible to send audio from the microphone directly into the server: + +\verbatim +rec -r 8k -e signed-integer -c 1 -b 16 -t raw -q - | nc -N localhost 5050 +\endverbatim */ diff --git a/src/nnet3/decodable-online-looped.cc b/src/nnet3/decodable-online-looped.cc index 2159575df6c..751438606e8 100644 --- a/src/nnet3/decodable-online-looped.cc +++ b/src/nnet3/decodable-online-looped.cc @@ -30,6 +30,7 @@ DecodableNnetLoopedOnlineBase::DecodableNnetLoopedOnlineBase( num_chunks_computed_(0), current_log_post_subsampled_offset_(-1), info_(info), + frame_offset_(0), input_features_(input_features), ivector_features_(ivector_features), computer_(info_.opts.compute_config, info_.computation, @@ -66,7 +67,7 @@ int32 DecodableNnetLoopedOnlineBase::NumFramesReady() const { if (input_finished) { // if the input has finished,... we'll pad with duplicates of the last frame // as needed to get the required right context. - return (features_ready + sf - 1) / sf; + return (features_ready + sf - 1) / sf - frame_offset_; } else { // note: info_.right_context_ includes both the model context and any // extra_right_context_ (but this @@ -78,7 +79,7 @@ int32 DecodableNnetLoopedOnlineBase::NumFramesReady() const { // doesn't need any attention to rounding because info_.frames_per_chunk // is always a multiple of 'sf' (see 'frames_per_chunk = GetChunksize..." // in decodable-simple-looped.cc). - return num_chunks_ready * info_.frames_per_chunk / sf; + return num_chunks_ready * info_.frames_per_chunk / sf - frame_offset_; } } @@ -105,9 +106,14 @@ bool DecodableNnetLoopedOnlineBase::IsLastFrame( return false; int32 sf = info_.opts.frame_subsampling_factor, num_subsampled_frames_ready = (features_ready + sf - 1) / sf; - return (subsampled_frame == num_subsampled_frames_ready - 1); + return (subsampled_frame + frame_offset_ == num_subsampled_frames_ready - 1); } +void DecodableNnetLoopedOnlineBase::SetFrameOffset(int32 frame_offset) { + KALDI_ASSERT(0 <= frame_offset && + frame_offset <= frame_offset_ + NumFramesReady()); + frame_offset_ = frame_offset; +} void DecodableNnetLoopedOnlineBase::AdvanceChunk() { // Prepare the input data for the next chunk of features. @@ -231,6 +237,7 @@ void DecodableNnetLoopedOnlineBase::AdvanceChunk() { BaseFloat DecodableNnetLoopedOnline::LogLikelihood(int32 subsampled_frame, int32 index) { + subsampled_frame += frame_offset_; EnsureFrameIsComputed(subsampled_frame); // note: we index by 'inde return current_log_post_( @@ -241,6 +248,7 @@ BaseFloat DecodableNnetLoopedOnline::LogLikelihood(int32 subsampled_frame, BaseFloat DecodableAmNnetLoopedOnline::LogLikelihood(int32 subsampled_frame, int32 index) { + subsampled_frame += frame_offset_; EnsureFrameIsComputed(subsampled_frame); return current_log_post_( subsampled_frame - current_log_post_subsampled_offset_, diff --git a/src/nnet3/decodable-online-looped.h b/src/nnet3/decodable-online-looped.h index 3041d3c4637..5ed5c0d73a5 100644 --- a/src/nnet3/decodable-online-looped.h +++ b/src/nnet3/decodable-online-looped.h @@ -81,6 +81,17 @@ class DecodableNnetLoopedOnlineBase: public DecodableInterface { return info_.opts.frame_subsampling_factor; } + /// Sets the frame offset value. Frame offset is initialized to 0 when the + /// decodable object is constructed and stays as 0 unless this method is + /// called. This method is useful when we want to reset the decoder state, + /// i.e. call decoder.InitDecoding(), but we want to keep using the same + /// decodable object, e.g. in case of an endpoint. The frame offset affects + /// the behavior of IsLastFrame(), NumFramesReady() and LogLikelihood() + /// methods. + void SetFrameOffset(int32 frame_offset); + + /// Returns the frame offset value. + int32 GetFrameOffset() const { return frame_offset_; } protected: @@ -111,6 +122,11 @@ class DecodableNnetLoopedOnlineBase: public DecodableInterface { const DecodableNnetSimpleLoopedInfo &info_; + // IsLastFrame(), NumFramesReady() and LogLikelihood() methods take into + // account this offset value. We initialize frame_offset_ as 0 and it stays as + // 0 unless SetFrameOffset() method is called. + int32 frame_offset_; + private: // This function does the computation for the next chunk. It will change diff --git a/src/online2/online-feature-pipeline.h b/src/online2/online-feature-pipeline.h index f89cbbbb898..fab1be3cb27 100644 --- a/src/online2/online-feature-pipeline.h +++ b/src/online2/online-feature-pipeline.h @@ -166,7 +166,7 @@ class OnlineFeaturePipeline: public OnlineFeatureInterface { // This is supplied for debug purposes. void GetAsMatrix(Matrix *feats); - + void FreezeCmvn(); // stop it from moving further (do this when you start // using fMLLR). This will crash if NumFramesReady() == 0. diff --git a/src/online2/online-nnet2-feature-pipeline.cc b/src/online2/online-nnet2-feature-pipeline.cc index 510c401fba2..c495c9fc8ef 100644 --- a/src/online2/online-nnet2-feature-pipeline.cc +++ b/src/online2/online-nnet2-feature-pipeline.cc @@ -128,6 +128,21 @@ void OnlineNnet2FeaturePipeline::GetFrame(int32 frame, return final_feature_->GetFrame(frame, feat); } +void OnlineNnet2FeaturePipeline::UpdateFrameWeights( + const std::vector > &delta_weights, + int32 frame_offset) { + if (frame_offset == 0) { + IvectorFeature()->UpdateFrameWeights(delta_weights); + } else { + std::vector > offset_delta_weights; + for (size_t i = 0; i < delta_weights.size(); i++) { + offset_delta_weights.push_back(std::make_pair( + delta_weights[i].first + frame_offset, delta_weights[i].second)); + } + IvectorFeature()->UpdateFrameWeights(offset_delta_weights); + } +} + void OnlineNnet2FeaturePipeline::SetAdaptationState( const OnlineIvectorExtractorAdaptationState &adaptation_state) { if (info_.use_ivectors) { diff --git a/src/online2/online-nnet2-feature-pipeline.h b/src/online2/online-nnet2-feature-pipeline.h index e379f7263ec..2e3fbf7bd78 100644 --- a/src/online2/online-nnet2-feature-pipeline.h +++ b/src/online2/online-nnet2-feature-pipeline.h @@ -196,6 +196,20 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { virtual int32 NumFramesReady() const; virtual void GetFrame(int32 frame, VectorBase *feat); + /// If you are downweighting silence, you can call + /// OnlineSilenceWeighting::GetDeltaWeights and supply the output to this + /// class using UpdateFrameWeights(). The reason why this call happens + /// outside this class, rather than this class pulling in the data weights, + /// relates to multi-threaded operation and also from not wanting this class + /// to have excessive dependencies. + /// + /// You must either always call this as soon as new data becomes available, + /// ideally just after calling AcceptWaveform(), or never call it for the + /// lifetime of this object. + void UpdateFrameWeights( + const std::vector > &delta_weights, + int32 frame_offset = 0); + /// Set the adaptation state to a particular value, e.g. reflecting previous /// utterances of the same speaker; this will generally be called after /// Copy(). diff --git a/src/online2/online-nnet3-decoding.cc b/src/online2/online-nnet3-decoding.cc index fbe0c2bed7b..1a6e43f1723 100644 --- a/src/online2/online-nnet3-decoding.cc +++ b/src/online2/online-nnet3-decoding.cc @@ -41,6 +41,12 @@ SingleUtteranceNnet3DecoderTpl::SingleUtteranceNnet3DecoderTpl( decoder_.InitDecoding(); } +template +void SingleUtteranceNnet3DecoderTpl::InitDecoding(int32 frame_offset) { + decoder_.InitDecoding(); + decodable_.SetFrameOffset(frame_offset); +} + template void SingleUtteranceNnet3DecoderTpl::AdvanceDecoding() { decoder_.AdvanceDecoding(&decodable_); @@ -56,7 +62,6 @@ int32 SingleUtteranceNnet3DecoderTpl::NumFramesDecoded() const { return decoder_.NumFramesDecoded(); } - template void SingleUtteranceNnet3DecoderTpl::GetLattice(bool end_of_utterance, CompactLattice *clat) const { diff --git a/src/online2/online-nnet3-decoding.h b/src/online2/online-nnet3-decoding.h index 568c0b6a0b3..9adf77fcb56 100644 --- a/src/online2/online-nnet3-decoding.h +++ b/src/online2/online-nnet3-decoding.h @@ -60,7 +60,13 @@ class SingleUtteranceNnet3DecoderTpl { const FST &fst, OnlineNnet2FeaturePipeline *features); - /// advance the decoding as far as we can. + /// Initializes the decoding and sets the frame offset of the underlying + /// decodable object. This method is called by the constructor. You can also + /// call this method when you want to reset the decoder state, but want to + /// keep using the same decodable object, e.g. in case of an endpoint. + void InitDecoding(int32 frame_offset = 0); + + /// Advances the decoding as far as we can. void AdvanceDecoding(); /// Finalizes the decoding. Cleans up and prunes remaining tokens, so the diff --git a/src/online2bin/Makefile b/src/online2bin/Makefile index 8792cc5b11a..28c135eb950 100644 --- a/src/online2bin/Makefile +++ b/src/online2bin/Makefile @@ -11,7 +11,8 @@ BINFILES = online2-wav-gmm-latgen-faster apply-cmvn-online \ online2-wav-nnet2-latgen-faster ivector-extract-online2 \ online2-wav-dump-features ivector-randomize \ online2-wav-nnet2-am-compute online2-wav-nnet2-latgen-threaded \ - online2-wav-nnet3-latgen-faster online2-wav-nnet3-latgen-grammar + online2-wav-nnet3-latgen-faster online2-wav-nnet3-latgen-grammar \ + online2-tcp-nnet3-decode-faster OBJFILES = diff --git a/src/online2bin/online2-tcp-nnet3-decode-faster.cc b/src/online2bin/online2-tcp-nnet3-decode-faster.cc new file mode 100644 index 00000000000..46e9cbc05be --- /dev/null +++ b/src/online2bin/online2-tcp-nnet3-decode-faster.cc @@ -0,0 +1,442 @@ +// online2bin/online2-tcp-nnet3-decode-faster.cc + +// Copyright 2014 Johns Hopkins University (author: Daniel Povey) +// 2016 Api.ai (Author: Ilya Platonov) +// 2018 Polish-Japanese Academy of Information Technology (Author: Danijel Korzinek) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "feat/wave-reader.h" +#include "online2/online-nnet3-decoding.h" +#include "online2/online-nnet2-feature-pipeline.h" +#include "online2/onlinebin-util.h" +#include "online2/online-timing.h" +#include "online2/online-endpoint.h" +#include "fstext/fstext-lib.h" +#include "lat/lattice-functions.h" +#include "util/kaldi-thread.h" +#include "nnet3/nnet-utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace kaldi { + +class TcpServer { + public: + explicit TcpServer(int read_timeout); + ~TcpServer(); + + bool Listen(int32 port); // start listening on a given port + int32 Accept(); // accept a client and return its descriptor + + bool ReadChunk(size_t len); // get more data and return false if end-of-stream + + Vector GetChunk(); // get the data read by above method + + bool Write(const std::string &msg); // write to accepted client + bool WriteLn(const std::string &msg, const std::string &eol = "\n"); // write line to accepted client + + void Disconnect(); + + private: + struct ::sockaddr_in h_addr_; + int32 server_desc_, client_desc_; + int16 *samp_buf_; + size_t buf_len_, has_read_; + pollfd client_set_[1]; + int read_timeout_; +}; + +std::string LatticeToString(const Lattice &lat, const fst::SymbolTable &word_syms) { + LatticeWeight weight; + std::vector alignment; + std::vector words; + GetLinearSymbolSequence(lat, &alignment, &words, &weight); + + std::ostringstream msg; + for (size_t i = 0; i < words.size(); i++) { + std::string s = word_syms.Find(words[i]); + if (s.empty()) { + KALDI_WARN << "Word-id " << words[i] << " not in symbol table."; + msg << "<#" << std::to_string(i) << "> "; + } else + msg << s << " "; + } + return msg.str(); +} + +std::string LatticeToString(const CompactLattice &clat, const fst::SymbolTable &word_syms) { + if (clat.NumStates() == 0) { + KALDI_WARN << "Empty lattice."; + return ""; + } + CompactLattice best_path_clat; + CompactLatticeShortestPath(clat, &best_path_clat); + + Lattice best_path_lat; + ConvertLattice(best_path_clat, &best_path_lat); + return LatticeToString(best_path_lat, word_syms); +} +} + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + + const char *usage = + "Reads in audio from a network socket and performs online\n" + "decoding with neural nets (nnet3 setup), with iVector-based\n" + "speaker adaptation and endpointing.\n" + "Note: some configuration values and inputs are set via config\n" + "files whose filenames are passed as options\n" + "\n" + "Usage: online2-tcp-nnet3-decode-faster [options] " + " \n"; + + ParseOptions po(usage); + + + // feature_opts includes configuration for the iVector adaptation, + // as well as the basic features. + OnlineNnet2FeaturePipelineConfig feature_opts; + nnet3::NnetSimpleLoopedComputationOptions decodable_opts; + LatticeFasterDecoderConfig decoder_opts; + OnlineEndpointConfig endpoint_opts; + + BaseFloat chunk_length_secs = 0.18; + BaseFloat output_period = 1; + BaseFloat samp_freq = 16000.0; + int port_num = 5050; + int read_timeout = 3; + + po.Register("samp-freq", &samp_freq, + "Sampling frequency of the input signal (coded as 16-bit slinear)."); + po.Register("chunk-length", &chunk_length_secs, + "Length of chunk size in seconds, that we process."); + po.Register("output-period", &output_period, + "How often in seconds, do we check for changes in output."); + po.Register("num-threads-startup", &g_num_threads, + "Number of threads used when initializing iVector extractor."); + po.Register("read-timeout", &read_timeout, + "Number of seconds of timout for TCP audio data to appear on the stream. Use -1 for blocking."); + po.Register("port-num", &port_num, + "Port number the server will listen on."); + + feature_opts.Register(&po); + decodable_opts.Register(&po); + decoder_opts.Register(&po); + endpoint_opts.Register(&po); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + return 1; + } + + std::string nnet3_rxfilename = po.GetArg(1), + fst_rxfilename = po.GetArg(2), + word_syms_filename = po.GetArg(3); + + OnlineNnet2FeaturePipelineInfo feature_info(feature_opts); + + KALDI_VLOG(1) << "Loading AM..."; + + TransitionModel trans_model; + nnet3::AmNnetSimple am_nnet; + { + bool binary; + Input ki(nnet3_rxfilename, &binary); + trans_model.Read(ki.Stream(), binary); + am_nnet.Read(ki.Stream(), binary); + SetBatchnormTestMode(true, &(am_nnet.GetNnet())); + SetDropoutTestMode(true, &(am_nnet.GetNnet())); + nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(am_nnet.GetNnet())); + } + + // this object contains precomputed stuff that is used by all decodable + // objects. It takes a pointer to am_nnet because if it has iVectors it has + // to modify the nnet to accept iVectors at intervals. + nnet3::DecodableNnetSimpleLoopedInfo decodable_info(decodable_opts, + &am_nnet); + + KALDI_VLOG(1) << "Loading FST..."; + + fst::Fst *decode_fst = ReadFstKaldiGeneric(fst_rxfilename); + + fst::SymbolTable *word_syms = NULL; + if (!word_syms_filename.empty()) + if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) + KALDI_ERR << "Could not read symbol table from file " + << word_syms_filename; + + signal(SIGPIPE, SIG_IGN); // ignore SIGPIPE to avoid crashing when socket forcefully disconnected + + TcpServer server(read_timeout); + + server.Listen(port_num); + + while (true) { + + server.Accept(); + + int32 samp_count = 0;// this is used for output refresh rate + size_t chunk_len = static_cast(chunk_length_secs * samp_freq); + int32 check_period = static_cast(samp_freq * output_period); + int32 check_count = check_period; + + int32 frame_offset = 0; + + bool eos = false; + + OnlineNnet2FeaturePipeline feature_pipeline(feature_info); + SingleUtteranceNnet3Decoder decoder(decoder_opts, trans_model, + decodable_info, + *decode_fst, &feature_pipeline); + + while (!eos) { + + decoder.InitDecoding(frame_offset); + OnlineSilenceWeighting silence_weighting( + trans_model, + feature_info.silence_weighting_config, + decodable_opts.frame_subsampling_factor); + std::vector> delta_weights; + + while (true) { + eos = !server.ReadChunk(chunk_len); + + if (eos) { + feature_pipeline.InputFinished(); + decoder.AdvanceDecoding(); + decoder.FinalizeDecoding(); + frame_offset += decoder.NumFramesDecoded(); + if (decoder.NumFramesDecoded() > 0) { + CompactLattice lat; + decoder.GetLattice(true, &lat); + std::string msg = LatticeToString(lat, *word_syms); + server.WriteLn(msg); + } else + server.Write("\n"); + server.Disconnect(); + break; + } + + Vector wave_part = server.GetChunk(); + feature_pipeline.AcceptWaveform(samp_freq, wave_part); + samp_count += chunk_len; + + if (silence_weighting.Active() && + feature_pipeline.IvectorFeature() != NULL) { + silence_weighting.ComputeCurrentTraceback(decoder.Decoder()); + silence_weighting.GetDeltaWeights(feature_pipeline.NumFramesReady(), + &delta_weights); + feature_pipeline.UpdateFrameWeights(delta_weights, + frame_offset * decodable_opts.frame_subsampling_factor); + } + + decoder.AdvanceDecoding(); + + if (samp_count > check_count) { + if (decoder.NumFramesDecoded() > 0) { + Lattice lat; + decoder.GetBestPath(false, &lat); + std::string msg = LatticeToString(lat, *word_syms); + server.WriteLn(msg, "\r"); + } + check_count += check_period; + } + + if (decoder.EndpointDetected(endpoint_opts)) { + decoder.FinalizeDecoding(); + frame_offset += decoder.NumFramesDecoded(); + CompactLattice lat; + decoder.GetLattice(true, &lat); + std::string msg = LatticeToString(lat, *word_syms); + server.WriteLn(msg); + break; + } + } + } + } + } catch (const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} // main() + + +namespace kaldi { +TcpServer::TcpServer(int read_timeout) { + server_desc_ = -1; + client_desc_ = -1; + samp_buf_ = NULL; + buf_len_ = 0; + read_timeout_ = 1000 * read_timeout; +} + +bool TcpServer::Listen(int32 port) { + h_addr_.sin_addr.s_addr = INADDR_ANY; + h_addr_.sin_port = htons(port); + h_addr_.sin_family = AF_INET; + + server_desc_ = socket(AF_INET, SOCK_STREAM, 0); + + if (server_desc_ == -1) { + KALDI_ERR << "Cannot create TCP socket!"; + return false; + } + + int32 flag = 1; + int32 len = sizeof(int32); + if (setsockopt(server_desc_, SOL_SOCKET, SO_REUSEADDR, &flag, len) == -1) { + KALDI_ERR << "Cannot set socket options!"; + return false; + } + + if (bind(server_desc_, (struct sockaddr *) &h_addr_, sizeof(h_addr_)) == -1) { + KALDI_ERR << "Cannot bind to port: " << port << " (is it taken?)"; + return false; + } + + if (listen(server_desc_, 1) == -1) { + KALDI_ERR << "Cannot listen on port!"; + return false; + } + + KALDI_LOG << "TcpServer: Listening on port: " << port; + + return true; + +} + +TcpServer::~TcpServer() { + Disconnect(); + if (server_desc_ != -1) + close(server_desc_); + delete[] samp_buf_; +} + +int32 TcpServer::Accept() { + KALDI_LOG << "Waiting for client..."; + + socklen_t len; + + len = sizeof(struct sockaddr); + client_desc_ = accept(server_desc_, (struct sockaddr *) &h_addr_, &len); + + struct sockaddr_storage addr; + char ipstr[20]; + + len = sizeof addr; + getpeername(client_desc_, (struct sockaddr *) &addr, &len); + + struct sockaddr_in *s = (struct sockaddr_in *) &addr; + inet_ntop(AF_INET, &s->sin_addr, ipstr, sizeof ipstr); + + client_set_[0].fd = client_desc_; + client_set_[0].events = POLLIN; + + KALDI_LOG << "Accepted connection from: " << ipstr; + + return client_desc_; +} + +bool TcpServer::ReadChunk(size_t len) { + if (buf_len_ != len) { + buf_len_ = len; + delete[] samp_buf_; + samp_buf_ = new int16[len]; + } + + ssize_t ret; + int poll_ret; + size_t to_read = len; + has_read_ = 0; + while (to_read > 0) { + poll_ret = poll(client_set_, 1, read_timeout_); + if (poll_ret == 0) { + KALDI_WARN << "Socket timeout! Disconnecting..."; + break; + } + if (client_set_[0].revents != POLLIN) { + KALDI_WARN << "Socket error! Disconnecting..."; + break; + } + ret = read(client_desc_, static_cast(samp_buf_ + has_read_), to_read * sizeof(int16)); + if (ret <= 0) { + KALDI_WARN << "Stream over..."; + break; + } + to_read -= ret / sizeof(int16); + has_read_ += ret / sizeof(int16); + } + + return has_read_ > 0; +} + +Vector TcpServer::GetChunk() { + Vector buf; + + buf.Resize(static_cast(has_read_)); + + for (int i = 0; i < has_read_; i++) + buf(i) = static_cast(samp_buf_[i]); + + return buf; +} + +bool TcpServer::Write(const std::string &msg) { + + const char *p = msg.c_str(); + size_t to_write = msg.size(); + size_t wrote = 0; + while (to_write > 0) { + ssize_t ret = write(client_desc_, static_cast(p + wrote), to_write); + if (ret <= 0) + return false; + + to_write -= ret; + wrote += ret; + } + + return true; +} + +bool TcpServer::WriteLn(const std::string &msg, const std::string &eol) { + if (Write(msg)) + return Write(eol); + else return false; +} + +void TcpServer::Disconnect() { + if (client_desc_ != -1) { + close(client_desc_); + client_desc_ = -1; + } +} +} // namespace kaldi \ No newline at end of file From 6134c290fd58680785bdcd6aa1368be045c620eb Mon Sep 17 00:00:00 2001 From: Shujian2015 Date: Wed, 20 Mar 2019 21:37:34 -0400 Subject: [PATCH 097/235] [scripts] Fix bug in comment (#3152) --- egs/wsj/s5/utils/parse_options.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/parse_options.sh b/egs/wsj/s5/utils/parse_options.sh index 34476fdb37a..335e69e9ac7 100755 --- a/egs/wsj/s5/utils/parse_options.sh +++ b/egs/wsj/s5/utils/parse_options.sh @@ -42,7 +42,7 @@ done ### -### No we process the command line options +### Now we process the command line options ### while true; do [ -z "${1:-}" ] && break; # break if there are no arguments From aead118bb161543c80ea1ce1df758a7e5c0aac45 Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Fri, 22 Mar 2019 01:32:49 +0530 Subject: [PATCH 098/235] [scripts] Fix bug in steps/segmentation/ali_to_targets.sh (#3155) --- egs/wsj/s5/steps/segmentation/ali_to_targets.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/ali_to_targets.sh b/egs/wsj/s5/steps/segmentation/ali_to_targets.sh index 78c76a8ea01..56d93df3c6b 100644 --- a/egs/wsj/s5/steps/segmentation/ali_to_targets.sh +++ b/egs/wsj/s5/steps/segmentation/ali_to_targets.sh @@ -82,9 +82,9 @@ nj=$(cat $ali_dir/num_jobs) || exit 1 $cmd JOB=1:$nj $dir/log/get_arc_info.JOB.log \ ali-to-phones --ctm-output --frame-shift=1 \ - $srcdir/final.mdl "ark:gunzip -c $ali_dir/lat.JOB.gz |" - \| \ + $srcdir/final.mdl "ark:gunzip -c $ali_dir/ali.JOB.gz |" - \| \ utils/int2sym.pl -f 5 $lang/phones.txt \| \ - awk '{print $1" "int($3)" "int($4)" 1.0 "$5}' \| \ + awk '{print $1" "int($3)" "int($4)" 1.0 "$5}' \> \ $dir/arc_info_sym.JOB.txt || exit 1 # make $dir an absolute pathname. From 213ae52ac45a2e1d304ab0319d431484f989272a Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Thu, 21 Mar 2019 15:58:16 -0700 Subject: [PATCH 099/235] [scripts] Avoid holding out more data than the requested num-utts (due to utt2uniq) (#3141) --- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 65 +++++++++++++------------ 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 9996820d6d3..ae4a0474a24 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -151,36 +151,40 @@ mkdir -p $dir/log $dir/info # Get list of validation utterances. frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1 +if [ -f $data/utt2uniq ]; then + # Must hold out all augmented versions of the same utterance. + echo "$0: File $data/utt2uniq exists, so ensuring the hold-out set" \ + "includes all perturbed versions of the same source utterance." + utils/utt2spk_to_spk2utt.pl $data/utt2uniq 2>/dev/null | + awk -v max_utt=$num_utts_subset '{ + for (n=2;n<=NF;n++) print $n; + printed += NF-1; + if (printed >= max_utt) nextfile; }' | + sort > $dir/valid_uttlist +else + awk '{print $1}' $data/utt2spk | \ + utils/shuffle_list.pl 2>/dev/null | \ + head -$num_utts_subset > $dir/valid_uttlist +fi +len_valid_uttlist=$(wc -l < $dir/valid_uttlist) + awk '{print $1}' $data/utt2spk | \ - utils/shuffle_list.pl 2>/dev/null | head -$num_utts_subset > $dir/valid_uttlist + utils/filter_scp.pl --exclude $dir/valid_uttlist | \ + utils/shuffle_list.pl 2>/dev/null | \ + head -$num_utts_subset > $dir/train_subset_uttlist +len_trainsub_uttlist=$(wc -l <$dir/train_subset_uttlist) -len_uttlist=$(wc -l < $dir/valid_uttlist) -if [ $len_uttlist -lt $num_utts_subset ]; then - echo "Number of utterances is very small. Please check your data." && exit 1; +if [[ $len_valid_uttlist -lt $num_utts_subset || + $len_trainsub_uttlist -lt $num_utts_subset ]]; then + echo "$0: Number of utterances is very small. Please check your data." && exit 1; fi -if [ -f $data/utt2uniq ]; then # this matters if you use data augmentation. - # because of this stage we can again have utts with lengths less than - # frames_per_eg - echo "File $data/utt2uniq exists, so augmenting valid_uttlist to" - echo "include all perturbed versions of the same 'real' utterances." - mv $dir/valid_uttlist $dir/valid_uttlist.tmp - utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt - cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \ - sort | uniq | utils/apply_map.pl $dir/uniq2utt | \ - awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid_uttlist - rm $dir/uniq2utt $dir/valid_uttlist.tmp -fi +echo "$0: Holding out $len_valid_uttlist utterances in validation set and" \ + "$len_trainsub_uttlist in training diagnostic set, out of total" \ + "$(wc -l < $data/utt2spk)." -echo "$0: creating egs. To ensure they are not deleted later you can do: touch $dir/.nodelete" -awk '{print $1}' $data/utt2spk | \ - utils/filter_scp.pl --exclude $dir/valid_uttlist | \ - utils/shuffle_list.pl 2>/dev/null | head -$num_utts_subset > $dir/train_subset_uttlist -len_uttlist=$(wc -l <$dir/train_subset_uttlist) -if [ $len_uttlist -lt $num_utts_subset ]; then - echo "Number of utterances is very small. Please check your data." && exit 1; -fi +echo "$0: creating egs. To ensure they are not deleted later you can do: touch $dir/.nodelete" ## Set up features. echo "$0: feature type is raw" @@ -342,9 +346,8 @@ if [ $stage -le 2 ]; then $egs_opts --normalization-fst-scale=$normalization_fst_scale \ $trans_mdl_opt $chaindir/normalization.fst \ "$train_subset_feats" ark,s,cs:- "ark:$dir/train_subset_all.cegs" || exit 1 - wait sleep 5 # wait for file system to sync. - echo "... Getting subsets of validation examples for diagnostics and combination." + echo "$0: Getting subsets of validation examples for diagnostics and combination." if $generate_egs_scp; then valid_diagnostic_output="ark,scp:$dir/valid_diagnostic.cegs,$dir/valid_diagnostic.scp" train_diagnostic_output="ark,scp:$dir/train_diagnostic.cegs,$dir/train_diagnostic.scp" @@ -365,7 +368,6 @@ if [ $stage -le 2 ]; then $cmd $dir/log/create_train_subset_diagnostic.log \ nnet3-chain-subset-egs --n=$num_egs_diagnostic ark:$dir/train_subset_all.cegs \ $train_diagnostic_output || exit 1 - wait sleep 5 # wait for file system to sync. if $generate_egs_scp; then cat $dir/valid_combine.cegs $dir/train_combine.cegs | \ @@ -375,7 +377,7 @@ if [ $stage -le 2 ]; then fi for f in $dir/{combine,train_diagnostic,valid_diagnostic}.cegs; do - [ ! -s $f ] && echo "No examples in file $f" && exit 1; + [ ! -s $f ] && echo "$0: No examples in file $f" && exit 1; done rm $dir/valid_all.cegs $dir/train_subset_all.cegs $dir/{train,valid}_combine.cegs ) || touch $dir/.error & @@ -412,7 +414,7 @@ if [ $stage -le 4 ]; then fi if [ -f $dir/.error ]; then - echo "Error detected while creating train/valid egs" && exit 1 + echo "$0: Error detected while creating train/valid egs" && exit 1 fi if [ $stage -le 5 ]; then @@ -485,11 +487,11 @@ fi wait if [ -f $dir/.error ]; then - echo "Error detected while creating train/valid egs" && exit 1 + echo "$0: Error detected while creating train/valid egs" && exit 1 fi if [ $stage -le 6 ]; then - echo "$0: removing temporary archives" + echo "$0: Removing temporary archives, alignments and lattices" ( cd $dir for f in $(ls -l . | grep 'cegs_orig' | awk '{ X=NF-1; Y=NF-2; if ($X == "->") print $Y, $NF; }'); do rm $f; done @@ -501,7 +503,6 @@ if [ $stage -le 6 ]; then # there are some extra soft links that we should delete. for f in $dir/cegs.*.*.ark; do rm $f; done fi - echo "$0: removing temporary alignments, lattices and transforms" rm $dir/ali.{ark,scp} 2>/dev/null rm $dir/lat_special.*.{ark,scp} 2>/dev/null fi From 1ac8c922cbf6b2c34756d4b467cfa6067a6dba90 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sat, 23 Mar 2019 18:57:04 -0700 Subject: [PATCH 100/235] [src,scripts] Add support for two-pass agglomerative clustering. (#3058) --- .../v1/diarization/cluster.sh | 17 +- src/ivector/agglomerative-clustering.cc | 220 +++++++++++++----- src/ivector/agglomerative-clustering.h | 104 ++++++--- src/ivectorbin/agglomerative-cluster.cc | 27 ++- 4 files changed, 279 insertions(+), 89 deletions(-) diff --git a/egs/callhome_diarization/v1/diarization/cluster.sh b/egs/callhome_diarization/v1/diarization/cluster.sh index fa5ead5b6b9..5e5c6e9dbe5 100755 --- a/egs/callhome_diarization/v1/diarization/cluster.sh +++ b/egs/callhome_diarization/v1/diarization/cluster.sh @@ -14,6 +14,8 @@ stage=0 nj=10 cleanup=true threshold=0.5 +max_spk_fraction=1.0 +first_pass_max_utterances=32767 rttm_channel=0 read_costs=false reco2num_spk= @@ -36,6 +38,15 @@ if [ $# != 2 ]; then echo " --threshold # Cluster stopping criterion. Clusters with scores greater" echo " # than this value will be merged until all clusters" echo " # exceed this value." + echo " --max-spk-fraction # Clusters with total fraction of utterances greater than" + echo " # this value will not be merged. This is active only when" + echo " # reco2num-spk is supplied and" + echo " # 1.0 / num-spk <= max-spk-fraction <= 1.0." + echo " --first-pass-max-utterances # If the number of utterances is larger than first-pass-max-utterances," + echo " # then clustering is done in two passes. In the first pass, input points" + echo " # are divided into contiguous subsets of size first-pass-max-utterances" + echo " # and each subset is clustered separately. In the second pass, the first" + echo " # pass clusters are merged into the final set of clusters." echo " --rttm-channel # The value passed into the RTTM channel field. Only affects" echo " # the format of the RTTM file." echo " --read-costs # If true, interpret input scores as costs, i.e. similarity" @@ -78,8 +89,10 @@ if [ $stage -le 0 ]; then echo "$0: clustering scores" $cmd JOB=1:$nj $dir/log/agglomerative_cluster.JOB.log \ agglomerative-cluster --threshold=$threshold --read-costs=$read_costs \ - --reco2num-spk-rspecifier=$reco2num_spk scp:"$feats" \ - ark,t:$sdata/JOB/spk2utt ark,t:$dir/labels.JOB || exit 1; + --reco2num-spk-rspecifier=$reco2num_spk \ + --max-spk-fraction=$max_spk_fraction \ + --first-pass-max-utterances=$first_pass_max_utterances \ + scp:"$feats" ark,t:$sdata/JOB/spk2utt ark,t:$dir/labels.JOB || exit 1; fi if [ $stage -le 1 ]; then diff --git a/src/ivector/agglomerative-clustering.cc b/src/ivector/agglomerative-clustering.cc index 30138e00637..ced912ed195 100644 --- a/src/ivector/agglomerative-clustering.cc +++ b/src/ivector/agglomerative-clustering.cc @@ -2,6 +2,7 @@ // Copyright 2017-2018 Matthew Maciejewski // 2018 David Snyder +// 2019 Dogan Can // See ../../COPYING for clarification regarding multiple authors // @@ -24,65 +25,98 @@ namespace kaldi { void AgglomerativeClusterer::Cluster() { - KALDI_VLOG(2) << "Initializing cluster assignments."; - Initialize(); - - KALDI_VLOG(2) << "Clustering..."; - // This is the main algorithm loop. It moves through the queue merging - // clusters until a stopping criterion has been reached. - while (num_clusters_ > min_clust_ && !queue_.empty()) { - std::pair > pr = queue_.top(); - int32 i = (int32) pr.second.first, j = (int32) pr.second.second; - queue_.pop(); - // check to make sure clusters have not already been merged - if ((active_clusters_.find(i) != active_clusters_.end()) && - (active_clusters_.find(j) != active_clusters_.end())) - MergeClusters(i, j); - } + if (num_points_ > first_pass_max_points_) + ClusterTwoPass(); + else + ClusterSinglePass(); +} - std::vector new_assignments(num_points_); - int32 label_id = 0; - std::set::iterator it; - // Iterate through the clusters and assign all utterances within the cluster - // an ID label unique to the cluster. This is the final output and frees up - // the cluster memory accordingly. - for (it = active_clusters_.begin(); it != active_clusters_.end(); ++it) { - ++label_id; - AhcCluster *cluster = clusters_map_[*it]; - std::vector::iterator utt_it; - for (utt_it = cluster->utt_ids.begin(); - utt_it != cluster->utt_ids.end(); ++utt_it) - new_assignments[*utt_it] = label_id; - delete cluster; +void AgglomerativeClusterer::ClusterSinglePass() { + InitializeClusters(0, num_points_); + ComputeClusters(min_clusters_); + AssignClusters(); +} + +void AgglomerativeClusterer::ClusterTwoPass() { + // This is the first pass loop. We divide the input into equal size subsets + // making sure each subset has at most first_pass_max_points_ points. Then, we + // cluster the points in each subset separately until a stopping criterion is + // reached. We set the minimum number of clusters to 10 * min_clusters_ for + // each subset to avoid early merging of most clusters that would otherwise be + // kept separate in single pass clustering. + BaseFloat num_points = static_cast(num_points_); + int32 num_subsets = ceil(num_points / first_pass_max_points_); + int32 subset_size = ceil(num_points / num_subsets); + for (int32 n = 0; n < num_points_; n += subset_size) { + InitializeClusters(n, std::min(n + subset_size, num_points_)); + ComputeClusters(min_clusters_ * 10); + AddClustersToSecondPass(); } - assignments_->swap(new_assignments); + + // We swap the contents of the first and second pass data structures so that + // we can use the same method to do second pass clustering. + clusters_map_.swap(second_pass_clusters_map_); + active_clusters_.swap(second_pass_active_clusters_); + cluster_cost_map_.swap(second_pass_cluster_cost_map_); + queue_.swap(second_pass_queue_); + count_ = second_pass_count_; + + // This is the second pass. It moves through the queue merging clusters + // determined in the first pass until a stopping criterion is reached. + ComputeClusters(min_clusters_); + + AssignClusters(); } -BaseFloat AgglomerativeClusterer::GetCost(int32 i, int32 j) { +uint32 AgglomerativeClusterer::EncodePair(int32 i, int32 j) { if (i < j) - return cluster_cost_map_[std::make_pair(i, j)]; + return (static_cast(i) << 16) + static_cast(j); else - return cluster_cost_map_[std::make_pair(j, i)]; + return (static_cast(j) << 16) + static_cast(i); +} + +std::pair AgglomerativeClusterer::DecodePair(uint32 key) { + return std::make_pair(static_cast(key >> 16), + static_cast(key & 0x0000FFFFu)); } -void AgglomerativeClusterer::Initialize() { - KALDI_ASSERT(num_clusters_ != 0); - for (int32 i = 0; i < num_points_; i++) { +void AgglomerativeClusterer::InitializeClusters(int32 first, int32 last) { + KALDI_ASSERT(last > first); + clusters_map_.clear(); + active_clusters_.clear(); + cluster_cost_map_.clear(); + queue_ = QueueType(); // priority_queue does not have a clear method + + for (int32 i = first; i < last; i++) { // create an initial cluster of size 1 for each point std::vector ids; ids.push_back(i); - AhcCluster *c = new AhcCluster(++count_, -1, -1, ids); - clusters_map_[count_] = c; - active_clusters_.insert(count_); + AhcCluster *c = new AhcCluster(i + 1, -1, -1, ids); + clusters_map_[i + 1] = c; + active_clusters_.insert(i + 1); // propagate the queue with all pairs from the cost matrix - for (int32 j = i+1; j < num_clusters_; j++) { - BaseFloat cost = costs_(i,j); - cluster_cost_map_[std::make_pair(i+1, j+1)] = cost; - if (cost <= thresh_) - queue_.push(std::make_pair(cost, - std::make_pair(static_cast(i+1), - static_cast(j+1)))); + for (int32 j = i + 1; j < last; j++) { + BaseFloat cost = costs_(i, j); + uint32 key = EncodePair(i + 1, j + 1); + cluster_cost_map_[key] = cost; + if (cost <= threshold_) + queue_.push(std::make_pair(cost, key)); + } + } +} + +void AgglomerativeClusterer::ComputeClusters(int32 min_clusters) { + while (active_clusters_.size() > min_clusters && !queue_.empty()) { + std::pair pr = queue_.top(); + int32 i, j; + std::tie(i, j) = DecodePair(pr.second); + queue_.pop(); + // check to make sure clusters have not already been merged + if ((active_clusters_.find(i) != active_clusters_.end()) && + (active_clusters_.find(j) != active_clusters_.end())) { + if (clusters_map_[i]->size + clusters_map_[j]->size <= max_cluster_size_) + MergeClusters(i, j); } } } @@ -105,27 +139,99 @@ void AgglomerativeClusterer::MergeClusters(int32 i, int32 j) { std::set::iterator it; for (it = active_clusters_.begin(); it != active_clusters_.end(); ++it) { // The new cost is the sum of the costs of the new cluster's parents - BaseFloat new_cost = GetCost(*it, i) + GetCost(*it, j); - cluster_cost_map_[std::make_pair(*it, count_)] = new_cost; + BaseFloat new_cost = cluster_cost_map_[EncodePair(*it, i)] + + cluster_cost_map_[EncodePair(*it, j)]; + uint32 new_key = EncodePair(*it, count_); + cluster_cost_map_[new_key] = new_cost; BaseFloat norm = clust1->size * (clusters_map_[*it])->size; - if (new_cost / norm <= thresh_) - queue_.push(std::make_pair(new_cost / norm, - std::make_pair(static_cast(*it), - static_cast(count_)))); + if (new_cost / norm <= threshold_) + queue_.push(std::make_pair(new_cost / norm, new_key)); } active_clusters_.insert(count_); clusters_map_[count_] = clust1; delete clust2; - num_clusters_--; +} + +void AgglomerativeClusterer::AddClustersToSecondPass() { + // This method collects the results of first pass clustering for one subset, + // i.e. adds the set of active clusters to the set of second pass active + // clusters and computes the costs for the newly formed cluster pairs. + std::set::iterator it1, it2; + int32 count = second_pass_count_; + for (it1 = active_clusters_.begin(); it1 != active_clusters_.end(); ++it1) { + AhcCluster *clust1 = clusters_map_[*it1]; + second_pass_clusters_map_[++count] = clust1; + + // Compute new cluster pair costs + for (it2 = second_pass_active_clusters_.begin(); + it2 != second_pass_active_clusters_.end(); ++it2) { + AhcCluster *clust2 = second_pass_clusters_map_[*it2]; + uint32 new_key = EncodePair(count, *it2); + + BaseFloat new_cost = 0.0; + std::vector::iterator utt_it1, utt_it2; + for (utt_it1 = clust1->utt_ids.begin(); + utt_it1 != clust1->utt_ids.end(); ++utt_it1) { + for (utt_it2 = clust2->utt_ids.begin(); + utt_it2 != clust2->utt_ids.end(); ++utt_it2) { + new_cost += costs_(*utt_it1, *utt_it2); + } + } + + second_pass_cluster_cost_map_[new_key] = new_cost; + BaseFloat norm = clust1->size * clust2->size; + if (new_cost / norm <= threshold_) + second_pass_queue_.push(std::make_pair(new_cost / norm, new_key)); + } + + // Copy cluster pair costs that were already computed in the first pass + int32 count2 = second_pass_count_; + for (it2 = active_clusters_.begin(); it2 != it1; ++it2) { + uint32 key = EncodePair(*it1, *it2); + BaseFloat cost = cluster_cost_map_[key]; + BaseFloat norm = clust1->size * (clusters_map_[*it2])->size; + uint32 new_key = EncodePair(count, ++count2); + second_pass_cluster_cost_map_[new_key] = cost; + if (cost / norm <= threshold_) + second_pass_queue_.push(std::make_pair(cost / norm, new_key)); + } + } + // We update second_pass_count_ and second_pass_active_clusters_ here since + // above loop assumes they do not change while the loop is running. + while (second_pass_count_ < count) + second_pass_active_clusters_.insert(++second_pass_count_); +} + +void AgglomerativeClusterer::AssignClusters() { + assignments_->resize(num_points_); + int32 label_id = 0; + std::set::iterator it; + // Iterate through the clusters and assign all utterances within the cluster + // an ID label unique to the cluster. This is the final output and frees up + // the cluster memory accordingly. + for (it = active_clusters_.begin(); it != active_clusters_.end(); ++it) { + ++label_id; + AhcCluster *cluster = clusters_map_[*it]; + std::vector::iterator utt_it; + for (utt_it = cluster->utt_ids.begin(); + utt_it != cluster->utt_ids.end(); ++utt_it) + (*assignments_)[*utt_it] = label_id; + delete cluster; + } } void AgglomerativeCluster( const Matrix &costs, - BaseFloat thresh, - int32 min_clust, + BaseFloat threshold, + int32 min_clusters, + int32 first_pass_max_points, + BaseFloat max_cluster_fraction, std::vector *assignments_out) { - KALDI_ASSERT(min_clust >= 0); - AgglomerativeClusterer ac(costs, thresh, min_clust, assignments_out); + KALDI_ASSERT(min_clusters >= 0); + KALDI_ASSERT(max_cluster_fraction >= 1.0 / min_clusters); + AgglomerativeClusterer ac(costs, threshold, min_clusters, + first_pass_max_points, max_cluster_fraction, + assignments_out); ac.Cluster(); } diff --git a/src/ivector/agglomerative-clustering.h b/src/ivector/agglomerative-clustering.h index 310a336f8b5..ffd63a86e29 100644 --- a/src/ivector/agglomerative-clustering.h +++ b/src/ivector/agglomerative-clustering.h @@ -2,6 +2,7 @@ // Copyright 2017-2018 Matthew Maciejewski // 2018 David Snyder +// 2019 Dogan Can // See ../../COPYING for clarification regarding multiple authors // @@ -55,65 +56,108 @@ class AgglomerativeClusterer { public: AgglomerativeClusterer( const Matrix &costs, - BaseFloat thresh, - int32 min_clust, + BaseFloat threshold, + int32 min_clusters, + int32 first_pass_max_points, + BaseFloat max_cluster_fraction, std::vector *assignments_out) - : count_(0), costs_(costs), thresh_(thresh), min_clust_(min_clust), + : costs_(costs), threshold_(threshold), min_clusters_(min_clusters), + first_pass_max_points_(first_pass_max_points), assignments_(assignments_out) { - num_clusters_ = costs.NumRows(); num_points_ = costs.NumRows(); + + // The max_cluster_size_ is a hard limit on the number points in a cluster. + // This is useful for handling degenerate cases where some outlier points + // form their own clusters and force everything else to be clustered + // together, e.g. when min-clusters is provided instead of a threshold. + max_cluster_size_ = ceil(num_points_ * max_cluster_fraction); + + // The count_, which is used for identifying clusters, is initialized to + // num_points_ because cluster IDs 1..num_points_ are reserved for input + // points, which are the initial set of clusters. + count_ = num_points_; + + // The second_pass_count_, which is used for identifying the initial set of + // second pass clusters and initializing count_ before the second pass, is + // initialized to 0 and incremented whenever a new cluster is added to the + // initial set of second pass clusters. + second_pass_count_ = 0; } - // Performs the clustering + // Clusters points. Chooses single pass or two pass algorithm. void Cluster(); + + // Clusters points using single pass algorithm. + void ClusterSinglePass(); + + // Clusters points using two pass algorithm. + void ClusterTwoPass(); + private: - // Returns the cost between clusters with IDs i and j - BaseFloat GetCost(int32 i, int32 j); + // Encodes cluster pair into a 32bit unsigned integer. + uint32 EncodePair(int32 i, int32 j); + // Decodes cluster pair from a 32bit unsigned integer. + std::pair DecodePair(uint32 key); // Initializes the clustering queue with singleton clusters - void Initialize(); + void InitializeClusters(int32 first, int32 last); + // Does hierarchical agglomerative clustering + void ComputeClusters(int32 min_clusters); + // Adds clusters created in first pass to second pass clusters + void AddClustersToSecondPass(); + // Assigns points to clusters + void AssignClusters(); // Merges clusters with IDs i and j and updates cost map and queue void MergeClusters(int32 i, int32 j); - - int32 count_; // Count of clusters that have been created. Also used to give - // clusters unique IDs. const Matrix &costs_; // cost matrix - BaseFloat thresh_; // stopping criterion threshold - int32 min_clust_; // minimum number of clusters + BaseFloat threshold_; // stopping criterion threshold + int32 min_clusters_; // minimum number of clusters + int32 first_pass_max_points_; // maximum number of points in each subset std::vector *assignments_; // assignments out + int32 num_points_; // total number of points to cluster + int32 max_cluster_size_; // maximum number of points in a cluster + int32 count_; // count of first pass clusters, used for identifying clusters + int32 second_pass_count_; // count of second pass clusters + // Priority queue using greater (lowest costs are highest priority). // Elements contain pairs of cluster IDs and their cost. - typedef std::pair > QueueElement; + typedef std::pair QueueElement; typedef std::priority_queue, std::greater > QueueType; - QueueType queue_; + QueueType queue_, second_pass_queue_; // Map from cluster IDs to cost between them - std::unordered_map, BaseFloat, - PairHasher> cluster_cost_map_; + std::unordered_map cluster_cost_map_; // Map from cluster ID to cluster object address std::unordered_map clusters_map_; - std::set active_clusters_; // IDs of unmerged clusters - int32 num_clusters_; // number of active clusters - int32 num_points_; // total number of points to cluster + // Set of unmerged cluster IDs + std::set active_clusters_; + + // Map from second pass cluster IDs to cost between them + std::unordered_map second_pass_cluster_cost_map_; + // Map from second pass cluster ID to cluster object address + std::unordered_map second_pass_clusters_map_; + // Set of unmerged second pass cluster IDs + std::set second_pass_active_clusters_; }; /** This is the function that is called to perform the agglomerative * clustering. It takes the following arguments: * - A matrix of all pairwise costs, with each row/column corresponding * to an utterance ID, and the elements of the matrix containing the - cost for pairing the utterances for its row and column + * cost for pairing the utterances for its row and column * - A threshold which is used as the stopping criterion for the clusters * - A minimum number of clusters that will not be merged past + * - A maximum fraction of points that can be in a cluster * - A vector which will be filled with integer IDs corresponding to each * of the rows/columns of the score matrix. * * The basic algorithm is as follows: * \code - * while (num-clusters > min_clust && smallest-merge-cost <= thresh) - * merge the two clusters with lowest cost. + * while (num-clusters > min-clusters && smallest-merge-cost <= threshold) + * if (size-of-new-cluster <= max-cluster-size) + * merge the two clusters with lowest cost * \endcode * * The cost between two clusters is the average cost of all pairwise @@ -126,11 +170,19 @@ class AgglomerativeClusterer { * costs between clusters I and M and clusters I and N, where * cluster J was formed by merging clusters M and N. * + * If the number of points to cluster is larger than first-pass-max-points, + * then clustering is done in two passes. In the first pass, input points are + * divided into contiguous subsets of size at most first-pass-max-points and + * each subset is clustered separately. In the second pass, the first pass + * clusters are merged into the final set of clusters. + * */ void AgglomerativeCluster( const Matrix &costs, - BaseFloat thresh, - int32 min_clust, + BaseFloat threshold, + int32 min_clusters, + int32 first_pass_max_points, + BaseFloat max_cluster_fraction, std::vector *assignments_out); } // end namespace kaldi. diff --git a/src/ivectorbin/agglomerative-cluster.cc b/src/ivectorbin/agglomerative-cluster.cc index 9dca9bfeb83..4812dd291e1 100644 --- a/src/ivectorbin/agglomerative-cluster.cc +++ b/src/ivectorbin/agglomerative-cluster.cc @@ -2,6 +2,7 @@ // Copyright 2016-2018 David Snyder // 2017-2018 Matthew Maciejewski +// 2019 Dogan Can // See ../../COPYING for clarification regarding multiple authors // @@ -47,8 +48,9 @@ int main(int argc, char *argv[]) { ParseOptions po(usage); std::string reco2num_spk_rspecifier; - BaseFloat threshold = 0.0; + BaseFloat threshold = 0.0, max_spk_fraction = 1.0; bool read_costs = false; + int32 first_pass_max_utterances = std::numeric_limits::max(); po.Register("reco2num-spk-rspecifier", &reco2num_spk_rspecifier, "If supplied, clustering creates exactly this many clusters for each" @@ -58,6 +60,16 @@ int main(int argc, char *argv[]) { po.Register("read-costs", &read_costs, "If true, the first" " argument is interpreted as a matrix of costs rather than a" " similarity matrix."); + po.Register("first-pass-max-utterances", &first_pass_max_utterances, + "If the number of utterances is larger than first-pass-max-utterances," + " then clustering is done in two passes. In the first pass, input points" + " are divided into contiguous subsets of size first-pass-max-utterances" + " and each subset is clustered separately. In the second pass, the first" + " pass clusters are merged into the final set of clusters."); + po.Register("max-spk-fraction", &max_spk_fraction, "Merge clusters if the" + " total fraction of utterances in them is less than this threshold." + " This is active only when reco2num-spk-rspecifier is supplied and" + " 1.0 / num-spk <= max-spk-fraction <= 1.0."); po.Read(argc, argv); @@ -90,10 +102,17 @@ int main(int argc, char *argv[]) { std::vector spk_ids; if (reco2num_spk_rspecifier.size()) { int32 num_speakers = reco2num_spk_reader.Value(reco); - AgglomerativeCluster(costs, - std::numeric_limits::max(), num_speakers, &spk_ids); + if (1.0 / num_speakers <= max_spk_fraction && max_spk_fraction <= 1.0) + AgglomerativeCluster(costs, std::numeric_limits::max(), + num_speakers, first_pass_max_utterances, + max_spk_fraction, &spk_ids); + else + AgglomerativeCluster(costs, std::numeric_limits::max(), + num_speakers, first_pass_max_utterances, + 1.0, &spk_ids); } else { - AgglomerativeCluster(costs, threshold, 1, &spk_ids); + AgglomerativeCluster(costs, threshold, 1, first_pass_max_utterances, + 1.0, &spk_ids); } for (int32 i = 0; i < spk_ids.size(); i++) label_writer.Write(uttlist[i], spk_ids[i]); From 0902c9e02c139cbf41d6d5c944957ee46a1bca6d Mon Sep 17 00:00:00 2001 From: saikiranvalluri <41471921+saikiranvalluri@users.noreply.github.com> Date: Sun, 24 Mar 2019 10:52:19 +0530 Subject: [PATCH 101/235] Update run.sh --- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 1ad8f9f1e0b..970a058a07f 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -32,9 +32,9 @@ if [ -f path.sh ]; then . ./path.sh; fi set -eou pipefail if [ $stage -le -1 ]; then -# local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts + local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts -# local/callhome_data_prep.sh $callhome_speech $callhome_transcripts + local/callhome_data_prep.sh $callhome_speech $callhome_transcripts # The lexicon is created using the LDC spanish lexicon, the words from the # fisher spanish corpus. Additional (most frequent) words are added from the From c10b0fe6d3a8be4e75fb31477acca179265c2ca4 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Sun, 24 Mar 2019 06:44:49 +0000 Subject: [PATCH 102/235] Apply g2p part added to get extended lexicon --- .../s5_gigaword/local/get_rnnlm_wordlist.py | 16 ++--- .../s5_gigaword/run.sh | 60 +++++++++++-------- egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh | 7 +-- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py index d6ddfbecc14..fc13a7af701 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py @@ -1,17 +1,18 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # 2018 Saikiran Valluri, GoVivace inc. import os, sys -if len(sys.argv) < 4: - print( "Usage: python get_rnnlm_wordlist.py ") +if len(sys.argv) < 5: + print( "Usage: python get_rnnlm_wordlist.py ") sys.exit() -lexicon_words = open(sys.argv[1], 'r') -pocolm_words = open(sys.argv[2], 'r') -rnnlm_wordsout = open(sys.argv[3], 'w') +lexicon_words = open(sys.argv[1], 'r', encoding="utf-8") +pocolm_words = open(sys.argv[2], 'r', encoding="utf-8") +rnnlm_wordsout = open(sys.argv[3], 'w', encoding="utf-8") +oov_wordlist = open(sys.argv[4], 'w', encoding="utf-8") line_count=0 lexicon=[] @@ -23,10 +24,11 @@ for line in pocolm_words: if not line.split()[0] in lexicon: + oov_wordlist.write(line.split()[0]+'\n') rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') line_count = line_count + 1 lexicon_words.close() pocolm_words.close() rnnlm_wordsout.close() - +oov_wordlist.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 1ad8f9f1e0b..4abd34096ef 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -6,6 +6,7 @@ stage=-1 lmstage=-2 +train_rnnlm=true addtraintext=true num_words_pocolm=110000 train_sgmm2=false @@ -32,31 +33,23 @@ if [ -f path.sh ]; then . ./path.sh; fi set -eou pipefail if [ $stage -le -1 ]; then -# local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts + local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts -# local/callhome_data_prep.sh $callhome_speech $callhome_transcripts + local/callhome_data_prep.sh $callhome_speech $callhome_transcripts # The lexicon is created using the LDC spanish lexicon, the words from the # fisher spanish corpus. Additional (most frequent) words are added from the # ES gigaword corpus to bring the total to 64k words. The ES frequency sorted # wordlist is downloaded if it is not available. local/fsp_prepare_dict.sh $spanish_lexicon + # Let's keep the original dict copy for G2P training + cp -r data/local/dict data/local/dict_orig ( - steps/dict/train_g2p_seq2seq.sh data/local/dict/lexicon.txt exp/g2p || touch exp/g2p/.error + steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error ) & # Added c,j, v to the non silences phones manually - utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang - - # Make sure that you do not use your test and your dev sets to train the LM - # Some form of cross validation is possible where you decode your dev/set based on an - # LM that is trained on everything but that that conversation - # When in doubt about what your data partitions should be use local/fsp_ideal_data_partitions.pl - # to get the numbers. Depending on your needs, you might have to change the size of - # the splits within that file. The default paritions are based on the Kaldi + Joshua - # requirements which means that I have very large dev and test sets - local/fsp_train_lms.sh $split - local/fsp_create_test_lang.sh + utils/prepare_lang.sh data/local/dict_orig "" data/local/lang_orig data/lang_orig utils/fix_data_dir.sh data/local/data/train_all @@ -79,11 +72,7 @@ if [ $stage -le -1 ]; then local/create_splits.sh $split local/callhome_create_splits.sh $split_callhome - wait # wait till G2P training finishes - if [ -f exp/g2p/.error ]; then - rm exp/g2p/.error || true - echo "Fail to train the G2P model." && exit 1; - fi + fi if [ $stage -le 0 ]; then @@ -103,16 +92,37 @@ fi if [ $stage -le 1 ]; then local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm - local/get_rnnlm_wordlist.py data/lang/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ - "$rnnlm_workdir"/rnnlm_wordlist -fi - -if [ $stage -le 2 ]; then - local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ + local/get_rnnlm_wordlist.py data/local/dict/lexicon.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ + "$rnnlm_workdir"/rnnlm_wordlist "$rnnlm_workdir"/oov_pocolmwords + if $train_rnnlm; then + local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ --wordslist "$rnnlm_workdir"/rnnlm_wordlist --text-dir "$rnnlm_workdir"/text_lm + fi fi + if [ $stage -le 2 ]; then + wait # wait till G2P training finishes + if [ -f exp/g2p/.error ]; then + rm exp/g2p/.error || true + echo "Fail to train the G2P model." && exit 1; + fi + steps/dict/apply_g2p_seq2seq.sh "$rnnlm_workdir"/oov_pocolmwords exp/g2p "$rnnlm_workdir"/oov_g2p.lex + cat "$rnnlm_workdir"/oov_g2p.lex data/local/dict/lexicon.txt | sort -u > "$rnnlm_workdir"/lexicon_extended.txt + cp "$rnnlm_workdir"/lexicon_extended.txt data/local/dict/lexicon.txt # Replacing original lexicon with extended version. + + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang + + # Make sure that you do not use your test and your dev sets to train the LM + # Some form of cross validation is possible where you decode your dev/set based on an + # LM that is trained on everything but that that conversation + # When in doubt about what your data partitions should be use local/fsp_ideal_data_partitions.pl + # to get the numbers. Depending on your needs, you might have to change the size of + # the splits within that file. The default paritions are based on the Kaldi + Joshua + # requirements which means that I have very large dev and test sets + local/fsp_train_lms.sh $split + local/fsp_create_test_lang.sh + # Now compute CMVN stats for the train, dev and test subsets steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir diff --git a/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh b/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh index 77a08c305dd..e6e316ec6b1 100644 --- a/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh +++ b/egs/wsj/s5/steps/dict/apply_g2p_seq2seq.sh @@ -17,10 +17,9 @@ set -u set -e if [ $# != 3 ]; then - echo "Usage: $0 [options] " - echo " where is the training lexicon (one pronunciation per " - echo " word per line, with lines like 'hello h uh l ow') and" - echo " is directory where the models will be stored" + echo "Usage: $0 [options] " + echo " where is the OOV wordlist " + echo " is directory where the models will be stored" exit 1; fi From 3df45aec1d8f8a031eb8665c5c94e6be27e81803 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Sun, 24 Mar 2019 07:49:08 +0000 Subject: [PATCH 103/235] Small fix in run.sh rnnlm_wordlist --- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 4abd34096ef..9d332cf06de 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -92,7 +92,7 @@ fi if [ $stage -le 1 ]; then local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm - local/get_rnnlm_wordlist.py data/local/dict/lexicon.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ + local/get_rnnlm_wordlist.py data/lang_orig/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ "$rnnlm_workdir"/rnnlm_wordlist "$rnnlm_workdir"/oov_pocolmwords if $train_rnnlm; then local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ From 6bd9dad4d3151df7e13d263132e362a139ee5958 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Sun, 24 Mar 2019 08:20:25 -0700 Subject: [PATCH 104/235] [src] Disable unget warning in PeekToken (and other small fix) (#3163) --- src/base/io-funcs.cc | 7 ++----- src/base/io-funcs.h | 11 ++++++++--- src/hmm/hmm-topology.cc | 9 +++++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/base/io-funcs.cc b/src/base/io-funcs.cc index ff9c921874e..150f74099be 100644 --- a/src/base/io-funcs.cc +++ b/src/base/io-funcs.cc @@ -179,11 +179,8 @@ int PeekToken(std::istream &is, bool binary) { int ans = is.peek(); if (read_bracket) { if (!is.unget()) { - KALDI_WARN << "Error ungetting '<' in PeekToken"; - // Clear the bad bit. It seems to be possible for this code to be - // reached, and the C++ standard is very vague on whether even a single - // call to unget() should succeed; see - // http://www.cplusplus.com/reference/istream/istream/unget/ + // Clear the bad bit. This code can be (and is in fact) reached, since the + // C++ standard does not guarantee that a call to unget() must succeed. is.clear(); } } diff --git a/src/base/io-funcs.h b/src/base/io-funcs.h index 6396967f56b..895f661ecee 100644 --- a/src/base/io-funcs.h +++ b/src/base/io-funcs.h @@ -203,13 +203,18 @@ void WriteToken(std::ostream &os, bool binary, const std::string & token); /// value of the stream. int Peek(std::istream &is, bool binary); -/// ReadToken gets the next token and puts it in str (exception on failure). +/// ReadToken gets the next token and puts it in str (exception on failure). If +/// PeekToken() had been previously called, it is possible that the stream had +/// failed to unget the starting '<' character. In this case ReadToken() returns +/// the token string without the leading '<'. You must be prepared to handle +/// this case. ExpectToken() handles this internally, and is not affected. void ReadToken(std::istream &is, bool binary, std::string *token); /// PeekToken will return the first character of the next token, or -1 if end of /// file. It's the same as Peek(), except if the first character is '<' it will -/// skip over it and will return the next character. It will unget the '<' so -/// the stream is where it was before you did PeekToken(). +/// skip over it and will return the next character. It will attempt to unget +/// the '<' so the stream is where it was before you did PeekToken(), however, +/// this is not guaranteed (see ReadToken()). int PeekToken(std::istream &is, bool binary); /// ExpectToken tries to read in the given token, and throws an exception diff --git a/src/hmm/hmm-topology.cc b/src/hmm/hmm-topology.cc index cf134065dbf..29634ecda0b 100644 --- a/src/hmm/hmm-topology.cc +++ b/src/hmm/hmm-topology.cc @@ -69,7 +69,7 @@ void HmmTopology::Read(std::istream &is, bool binary) { ReadToken(is, binary, &token); while (token != "") { if (token != "") - KALDI_ERR << "Expected or , got instead "< or , got instead " << token; int32 state; ReadBasicType(is, binary, &state); if (state != static_cast(this_entry.size())) @@ -88,7 +88,8 @@ void HmmTopology::Read(std::istream &is, bool binary) { int32 self_loop_pdf_class = kNoPdf; ReadBasicType(is, binary, &forward_pdf_class); ReadToken(is, binary, &token); - KALDI_ASSERT(token == ""); + if (token != "") + KALDI_ERR << "Expected , got instead " << token; ReadBasicType(is, binary, &self_loop_pdf_class); this_entry.push_back(HmmState(forward_pdf_class, self_loop_pdf_class)); ReadToken(is, binary, &token); @@ -102,10 +103,10 @@ void HmmTopology::Read(std::istream &is, bool binary) { this_entry.back().transitions.push_back(std::make_pair(dst_state, trans_prob)); ReadToken(is, binary, &token); } - if(token == "") // TODO: remove this clause after a while. + if (token == "") // TODO: remove this clause after a while. KALDI_ERR << "You are trying to read old-format topology with new Kaldi."; if (token != "") - KALDI_ERR << "Reading HmmTopology, unexpected token "<, got instead " << token; ReadToken(is, binary, &token); } int32 my_index = entries_.size(); From 37f4f442f28820b30bdbfa78497ac7d40d8a0ba9 Mon Sep 17 00:00:00 2001 From: Brett Tiplitz <26090422+btiplitz@users.noreply.github.com> Date: Sun, 24 Mar 2019 11:27:50 -0400 Subject: [PATCH 105/235] [build] Add new nvidia tools to windows build (#3159) --- windows/variables.props.dev | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/windows/variables.props.dev b/windows/variables.props.dev index 9fb2457c99c..0810edcd262 100644 --- a/windows/variables.props.dev +++ b/windows/variables.props.dev @@ -8,6 +8,8 @@ C:\Users\Yenda\Downloads\kaldi-svn\tools\OpenBLAS-v0.2.14-Win64-int32 C:\Users\jtrmal\Documents\openfst\ C:\Users\jtrmal\Documents\openfst\build64 + c:\Users\jtrmal\Documents\cub\ + C:\Program FIles\NVIDIA Corporation\NvToolsExt\ @@ -29,5 +31,13 @@ $(MKLDIR) true + + $(CUBDIR) + true + + + $(NVTOOLSDIR) + true + From 77ac79f7058f8f86a0d9a77671d9453aa23170dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danijel=20Kor=C5=BEinek?= Date: Sun, 24 Mar 2019 18:09:33 +0100 Subject: [PATCH 106/235] [doc] Fix documentation errors and add more docs for tcp-server decoder (#3164) --- src/doc/online_decoding.dox | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/doc/online_decoding.dox b/src/doc/online_decoding.dox index dc04d9bef4e..9bcc2575be1 100644 --- a/src/doc/online_decoding.dox +++ b/src/doc/online_decoding.dox @@ -444,22 +444,25 @@ The program to run the TCP sever is online2-tcp-nnet3-decode-faster located in t ~/src/online2bin folder. The usage is as follows: \verbatim -online2-tcp-nnet3-decode-faster +online2-tcp-nnet3-decode-faster \endverbatim For example: \verbatim -online2-tcp-nnet3-decode-faster model/final.mdl graph/HCLG.fst graph/words.txt 5050 +online2-tcp-nnet3-decode-faster model/final.mdl graph/HCLG.fst graph/words.txt \endverbatim The word symbol table is mandatory (unlike other nnet3 online decoding programs) because the server outputs word strings. Endpointing is mandatory to make the operation of the program reasonable. Other, non-standard options include: + - port-num - the port the server listens on (by default 5050) - samp-freq - sampling frequency of audio (usually 8000 for telephony and 16000 for other uses) - chunk-length - length of signal being processed by decoder at each step - output-period - how often we check for changes in the decoding (ie. output refresh rate, default 1s) - num-threads-startup - number of threads used when initializing iVector extractor + - read-timeout - it the program doesn't receive data during this timeout, the server terminates the connection. + Use -1 to disable this feature. The TCP protocol simply takes RAW signal on input (16-bit signed integer encoding at chosen sampling frequency) and outputs simple text using the following @@ -479,9 +482,25 @@ command should look like this: \verbatim online2-tcp-nnet3-decode-faster --samp-freq=8000 --frames-per-chunk=20 --extra-left-context-initial=0 --frame-subsampling-factor=3 --config=model/conf/online.conf --min-active=200 --max-active=7000 - --beam=15.0 --lattice-beam=6.0 --acoustic-scale=1.0 model/final.mdl graph/HCLG.fst graph/words.txt 5050 + --beam=15.0 --lattice-beam=6.0 --acoustic-scale=1.0 --port-num=5050 model/final.mdl graph/HCLG.fst graph/words.txt \endverbatim +Note in order to make the communication as simple as possible, the server has to accept +any data on input and cannot figure out when the stream is over. It will therefore not +be able to terminate the connection and it is the client's resposibility to disconnect +when it is ready to do so. As a fallback for certain situations, the read-timeout option +was added, which will automatically disconnect if a chosen amount of seconds has passed. +Keep in mind, that this is not an ideal solution and it's a better idea to design your +client to properly disconnect the connection when neccessary. + +For testing purposes, we will use the netcat program. We will also use sox to reeoncode the +files properly from any source. Netcat has an issue that, similarly to what was stated above +about the server, it cannot always interpret the data and usually it won't automatically +disconnect the TCP connection. To get around this, we will use the '-N' switch, which kills +the connection once streaming of the file is complete, but this can have a small sideffect of +not reading the whole output from the Kaldi server if the discconect comes too fast. Just +keep this in mind if you intend to implement any of these programs into a production environment. + To send a WAV file into the server, it first needs to be decoded into raw audio, then it can be sent to the socket: \verbatim From 7e47695e793c113c385398dafb32f92572aec6f7 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Mon, 25 Mar 2019 06:28:24 +0000 Subject: [PATCH 107/235] Added sanity chack for Sparrowhawk normalizer in cleanup script --- .../s5_gigaword/local/clean_txt_dir.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh index 60269c0ab7e..1880b3a90cb 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh @@ -17,6 +17,12 @@ if [ $# -ne 2 ]; then exit 1; fi +if [ ! -s `which normalizer_main` ] ; then + echo "Sparrowhawk normalizer was not found installed !" + echo "Go to $KALDI_ROOT/tools and execute install_sparrowhawk.sh and try again!" + exit 1 +fi + txtdir=$1 textdir=$(realpath $txtdir) outdir=$(realpath $2) @@ -38,7 +44,7 @@ if [ $stage -le 0 ]; then $train_cmd --max_jobs_run 100 JOB=1:$numsplits $outdir/sparrowhawk/log/JOB.log \ local/run_norm.sh \ sparrowhawk_configuration.ascii_proto \ - $SPARROWHAWK_ROOT/language-resources/esp/sparrowhawk/ \ + $SPARROWHAWK_ROOT/language-resources/en/sparrowhawk/ \ $outdir/data \ JOB \ $outdir/sparrowhawk/ From 91a4611bba540c907b223c39b658bc5baca3a80f Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Mon, 25 Mar 2019 07:10:49 +0000 Subject: [PATCH 108/235] Data preparation fixes --- .../s5_gigaword/local/chain/run_tdnn_1g.sh | 7 ++++++- .../s5_gigaword/local/fsp_data_prep.sh | 1 + egs/fisher_callhome_spanish/s5_gigaword/run.sh | 8 +++++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh index c487f1bd222..08e378cf8c5 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh @@ -27,9 +27,10 @@ nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. affix=1g #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. common_egs_dir= reporting_email= +gigaword_workdir= # LSTM/chain options -train_stage=-10 +train_stage=-20 xent_regularize=0.1 dropout_schedule='0,0@0.20,0.3@0.50,0' @@ -277,6 +278,10 @@ if [ $stage -le 23 ]; then --online-ivector-dir exp/nnet3/ivectors_${data}_hires \ $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data} || exit 1; done + if [ $gigaword_workdir ]; then + bash local/rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $gigaword_workdir/rnnlm data/${data}_hires/ \ + ${dir}/decode_${lmtype}_${data} $dir/decode_gigaword_RNNLM_${lmtype}_${data} || exit 1; + fi bash local/rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $rnnlmdir data/${data}_hires/ \ ${dir}/decode_${lmtype}_${data} $dir/decode_rnnLM_${lmtype}_${data} || exit 1; ) || touch $dir/.error & diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh index 11d65da3e95..22b98a6c9db 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh @@ -133,6 +133,7 @@ if [ $stage -le 2 ]; then sed 's:::g' | \ sed 's:foreign>::g' | \ + sed 's:\[noise\]:[noise] :g' | \ sed 's:>::g' | \ #How do you handle numbers? grep -v '()' | \ diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 9d332cf06de..687fcfdf3c1 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -23,7 +23,7 @@ callhome_transcripts=/export/corpora/LDC/LDC96T17 split_callhome=local/splits/split_callhome gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data -rnnlm_workdir=/export/c03/svalluri/workdir_pocolm_2stage +rnnlm_workdir=workdir_rnnlm_Spanish_08032019 mfccdir=`pwd`/mfcc . ./cmd.sh @@ -75,6 +75,7 @@ if [ $stage -le -1 ]; then fi + if [ $stage -le 0 ]; then mkdir -p "$rnnlm_workdir"/gigaword_rawtext local/flatten_gigaword/flatten_all_gigaword.sh "$gigaword_datapath" "$rnnlm_workdir"/flattened_gigaword_corpus 24 @@ -90,6 +91,7 @@ if [ $stage -le 0 ]; then fi fi + if [ $stage -le 1 ]; then local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm local/get_rnnlm_wordlist.py data/lang_orig/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ @@ -108,7 +110,7 @@ if [ $stage -le 2 ]; then echo "Fail to train the G2P model." && exit 1; fi steps/dict/apply_g2p_seq2seq.sh "$rnnlm_workdir"/oov_pocolmwords exp/g2p "$rnnlm_workdir"/oov_g2p.lex - cat "$rnnlm_workdir"/oov_g2p.lex data/local/dict/lexicon.txt | sort -u > "$rnnlm_workdir"/lexicon_extended.txt + cat "$rnnlm_workdir"/oov_g2p.lex/lexicon.lex data/local/dict/lexicon.txt | sort | uniq | sed "/^$/d" > "$rnnlm_workdir"/lexicon_extended.txt cp "$rnnlm_workdir"/lexicon_extended.txt data/local/dict/lexicon.txt # Replacing original lexicon with extended version. utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang @@ -294,6 +296,6 @@ fi wait; if [ $stage -le 6 ]; then - local/chain/run_tdnn_1g.sh || exit 1; + local/chain/run_tdnn_1g.sh --gigaword-workdir $rnnlm_workdir || exit 1; fi exit 0; From 5f45dd17453dc3eb2424b35d78e1ed3eb20a5a2c Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Tue, 26 Mar 2019 08:02:39 -0400 Subject: [PATCH 109/235] Cosmetic options for gigaword textclean --- .../s5_gigaword/path.sh | 6 +++-- .../s5_gigaword/run.sh | 23 +++++++++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path.sh b/egs/fisher_callhome_spanish/s5_gigaword/path.sh index e622e7d5051..2993311fd90 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/path.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/path.sh @@ -7,5 +7,7 @@ export LD_LIBRARY_PATH=/home/dpovey/libs export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk export PATH=$SPARROWHAWK_ROOT/bin:$PATH -export LC_ALL=C.UTF-8 -export LANG=C.UTF-8 +export LC_ALL=C +export LANG=C + +source ~/anaconda/bin/activate py36 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index 687fcfdf3c1..e1c43d24902 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -6,7 +6,8 @@ stage=-1 lmstage=-2 -train_rnnlm=true +train_rnnlm=false +start_textcleanup=false addtraintext=true num_words_pocolm=110000 train_sgmm2=false @@ -14,7 +15,7 @@ train_sgmm2=false # call the next line with the directory where the Spanish Fisher data is # (the values below are just an example). sfisher_speech=/export/corpora/LDC/LDC2010S01 -sfisher_transcripts=/export/corpora/LDC/LDC2010T04 +sfisher_transcripts=/export/c03/svalluri//LDC2010T04 spanish_lexicon=/export/corpora/LDC/LDC96L16 split=local/splits/split_fisher @@ -44,9 +45,9 @@ if [ $stage -le -1 ]; then local/fsp_prepare_dict.sh $spanish_lexicon # Let's keep the original dict copy for G2P training cp -r data/local/dict data/local/dict_orig - ( - steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error - ) & +# ( +# steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error +# ) & # Added c,j, v to the non silences phones manually utils/prepare_lang.sh data/local/dict_orig "" data/local/lang_orig data/lang_orig @@ -75,8 +76,12 @@ if [ $stage -le -1 ]; then fi +if $start_textcleanup; then + echo "WARNING : Starting from cleaning up and normalizing the Gigword text" + echo " This might take few days........... You can opt out this stage " + echo " by setting start_textcleanup=false, and having text_lm ready inside rnnlm_workdir." -if [ $stage -le 0 ]; then + if [ $stage -le 0 ]; then mkdir -p "$rnnlm_workdir"/gigaword_rawtext local/flatten_gigaword/flatten_all_gigaword.sh "$gigaword_datapath" "$rnnlm_workdir"/flattened_gigaword_corpus 24 cat "$rnnlm_workdir"/flattened_gigaword_corpus/*.flat > "$rnnlm_workdir"/gigaword_rawtext/in.txt @@ -89,9 +94,9 @@ if [ $stage -le 0 ]; then if $addtraintext; then cat "$rnnlm_workdir"/text_lm/train.txt >> "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt fi + fi fi - if [ $stage -le 1 ]; then local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm local/get_rnnlm_wordlist.py data/lang_orig/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ @@ -110,7 +115,7 @@ if [ $stage -le 2 ]; then echo "Fail to train the G2P model." && exit 1; fi steps/dict/apply_g2p_seq2seq.sh "$rnnlm_workdir"/oov_pocolmwords exp/g2p "$rnnlm_workdir"/oov_g2p.lex - cat "$rnnlm_workdir"/oov_g2p.lex/lexicon.lex data/local/dict/lexicon.txt | sort | uniq | sed "/^$/d" > "$rnnlm_workdir"/lexicon_extended.txt + cat "$rnnlm_workdir"/oov_g2p.lex/lexicon.lex data/local/dict/lexicon.txt | sed "/^$/d" |sort | uniq > "$rnnlm_workdir"/lexicon_extended.txt cp "$rnnlm_workdir"/lexicon_extended.txt data/local/dict/lexicon.txt # Replacing original lexicon with extended version. utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang @@ -296,6 +301,6 @@ fi wait; if [ $stage -le 6 ]; then - local/chain/run_tdnn_1g.sh --gigaword-workdir $rnnlm_workdir || exit 1; + local/chain/run_tdnn_1g.sh --stage 9 --gigaword-workdir $rnnlm_workdir || exit 1; fi exit 0; From 27034a274ea76e4de8fda5e2441b0a44305bf8a1 Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Wed, 27 Mar 2019 15:11:55 -0400 Subject: [PATCH 110/235] [scripts] Fix non-randomness in getting utt2uniq, introduced in #3142 (#3175) --- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index ae4a0474a24..0185b9fbaad 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -155,7 +155,8 @@ if [ -f $data/utt2uniq ]; then # Must hold out all augmented versions of the same utterance. echo "$0: File $data/utt2uniq exists, so ensuring the hold-out set" \ "includes all perturbed versions of the same source utterance." - utils/utt2spk_to_spk2utt.pl $data/utt2uniq 2>/dev/null | + utils/utt2spk_to_spk2utt.pl $data/utt2uniq 2>/dev/null | \ + utils/shuffle_list.pl 2>/dev/null | \ awk -v max_utt=$num_utts_subset '{ for (n=2;n<=NF;n++) print $n; printed += NF-1; From f9828e9a2a71f69b72a50369b018b89fc889e1b2 Mon Sep 17 00:00:00 2001 From: Justin Luitjens Date: Wed, 27 Mar 2019 14:38:19 -0600 Subject: [PATCH 111/235] [build] Don't build for Tegra sm_XX versions on x86/ppc and vice versa; allow --cuda-arch overrides to have multiple versions (#3171) --- src/configure | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/configure b/src/configure index b21cc48f7ee..1013a3c162e 100755 --- a/src/configure +++ b/src/configure @@ -118,7 +118,7 @@ function rel2abs { } function read_value { - local val=`expr "X$1" : '[^=]*=\(.*\)'`; + local val=`expr "X$*" : '[^=]*=\(.*\)'`; echo $val } @@ -430,14 +430,27 @@ function configure_cuda { fi if [ -z "$CUDA_ARCH" ]; then - case $CUDA_VERSION in - 5_5) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35" ;; - 6_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50" ;; - 7_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53" ;; - 8_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62" ;; - 9_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70" ;; - 10_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_72,code=sm_72 -gencode arch=compute_75,code=sm_75" ;; - *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; + case `uname -m` in + x86_64|ppc64le) + case $CUDA_VERSION in + 5_5) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35" ;; + 6_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50" ;; + 7_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52" ;; + 8_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61" ;; + 9_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70" ;; + 10_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75" ;; + *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; + esac + ;; + aarch64) + case $CUDA_VERSION in + 7_*) CUDA_ARCH="-gencode arch=compute_53,code=sm_53" ;; + 8_*|9_*) CUDA_ARCH="-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62" ;; + 10_*) CUDA_ARCH="-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_72,code=sm_72" ;; + *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; + esac + ;; + *) echo "Unsupported architecture for use of Kaldi with CUDA. Please report it to Kaldi mailing list."; exit 1 ;; esac fi From 419e35c7df393642a0a114a2d9182701fece8d05 Mon Sep 17 00:00:00 2001 From: Ashish Arora Date: Fri, 29 Mar 2019 16:30:53 -0400 Subject: [PATCH 112/235] [egs] Fixes Re encoding to IAM, uw3 recipes (#3012) --- .../s5b/local/nnet3/run_ivector_common.sh | 1 - .../v1/local/unk_arc_post_to_transcription.py | 15 ++++--- egs/madcat_ar/v1/RESULTS | 18 ++++++++ egs/rimes/v1/RESULTS | 45 +++++++++++++++++++ .../v1/local/unk_arc_post_to_transcription.py | 15 ++++--- 5 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 egs/madcat_ar/v1/RESULTS create mode 100644 egs/rimes/v1/RESULTS diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh index f071842dc0b..a03cc5b2fa3 100755 --- a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh +++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh @@ -138,7 +138,6 @@ if [ $stage -le 5 ]; then # Also extract iVectors for the test data, but in this case we don't need the speed # perturbation (sp). for data in ${test_sets}; do - nspk=$(wc -l Date: Sat, 30 Mar 2019 14:05:45 -0600 Subject: [PATCH 113/235] [src] Efficiency improvement and extra checking for cudamarix, RE default stream (#3182) --- src/cudamatrix/cu-block-matrix.cc | 4 ++- src/cudamatrix/cu-device.cc | 2 +- src/cudamatrix/cu-matrix.cc | 48 ++++++++++++++++++++---------- src/cudamatrix/cu-packed-matrix.cc | 10 ++++--- src/cudamatrix/cu-packed-matrix.h | 6 ++-- src/cudamatrix/cu-value.h | 9 ++++-- src/cudamatrix/cu-vector.cc | 31 +++++++++++-------- 7 files changed, 71 insertions(+), 39 deletions(-) diff --git a/src/cudamatrix/cu-block-matrix.cc b/src/cudamatrix/cu-block-matrix.cc index fc8f4b7ce72..e0c64912207 100644 --- a/src/cudamatrix/cu-block-matrix.cc +++ b/src/cudamatrix/cu-block-matrix.cc @@ -140,7 +140,9 @@ void CuBlockMatrix::SetCudaData() { size_t size = NumBlocks() * sizeof(CuBlockMatrixData); cu_data_ = static_cast( CuDevice::Instantiate().Malloc(size)); - CU_SAFE_CALL(cudaMemcpy(cu_data_, &(tmp_cu_data[0]), size, cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(cu_data_, &(tmp_cu_data[0]), size, + cudaMemcpyHostToDevice, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile(__func__, tim); } #endif diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc index 85c2492c074..e5d161521fd 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc @@ -427,7 +427,7 @@ void CuDevice::AccuProfile(const char *function_name, // per-thread default stream. Since we compile with // -DCUDA_API_PER_THREAD_DEFAULT_STREAM, this equates to a per-thread // stream. - cudaStreamSynchronize(0); + CU_SAFE_CALL(cudaStreamSynchronize(0)); double elapsed = timer.Elapsed(); if (profile_map_.find(key) == profile_map_.end()) profile_map_[key] = elapsed; diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index 1f09ff278ce..efe8dec7652 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -324,7 +324,7 @@ void CuMatrixBase::CopyFromMat(const MatrixBase &src, CU_SAFE_CALL(cudaMemcpy2DAsync(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyHostToDevice, cudaStreamPerThread)); - cudaStreamSynchronize(cudaStreamPerThread); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuMatrixBase::CopyFromMat(from CPU)", tim); } else { @@ -431,9 +431,10 @@ void CuMatrixBase::CopyToMat(MatrixBase *dst, MatrixIndexT src_pitch = stride_*sizeof(Real); MatrixIndexT dst_pitch = dst->Stride()*sizeof(Real); MatrixIndexT width = NumCols()*sizeof(Real); - CU_SAFE_CALL(cudaMemcpy2D(dst->Data(), dst_pitch, this->data_, src_pitch, - width, this->num_rows_, cudaMemcpyDeviceToHost)); - + CU_SAFE_CALL(cudaMemcpy2DAsync(dst->Data(), dst_pitch, this->data_, + src_pitch, width, this->num_rows_, + cudaMemcpyDeviceToHost, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuMatrix::CopyToMatD2H", tim); } } else @@ -1670,7 +1671,10 @@ void CuMatrix::CompObjfAndDeriv(const std::vector >& s return; } void *addr = CuDevice::Instantiate().Malloc(sv_labels.size() * sizeof(MatrixElement)); - CU_SAFE_CALL(cudaMemcpy(addr, sv_labels.data(), sv_labels.size() * sizeof(MatrixElement), cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(addr, sv_labels.data(), sv_labels.size() * + sizeof(MatrixElement), + cudaMemcpyHostToDevice, + cudaStreamPerThread)); CuTimer tim; CuVector tmp(2, kUndefined); int dimBlock(CU1DBLOCK); @@ -2245,7 +2249,9 @@ void AddMatMatBatched(const Real alpha, std::vector* > &C, host_c_array[i] = C[i]->data_; } - CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(device_abc_array, host_abc_array, + 3*size*sizeof(Real*), cudaMemcpyHostToDevice, + cudaStreamPerThread)); CUBLAS_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), @@ -2325,15 +2331,21 @@ void CuMatrixBase::CopyRowsFromVec(const VectorBase &v) { if (v.Dim() == num_rows_*num_cols_) { if (stride_ == num_cols_) { const Real* v_data = v.Data(); - cudaMemcpy(data_, v_data, sizeof(Real)*num_rows_*num_cols_, cudaMemcpyHostToDevice); + CU_SAFE_CALL(cudaMemcpyAsync(data_, v_data, + sizeof(Real)*num_rows_*num_cols_, + cudaMemcpyHostToDevice, + cudaStreamPerThread)); } else { const Real *v_data = v.Data(); for (MatrixIndexT r = 0; r < num_rows_; r++) { Real *row_data = RowData(r); - cudaMemcpy(row_data, v_data, sizeof(Real)*num_cols_, cudaMemcpyHostToDevice); + CU_SAFE_CALL(cudaMemcpyAsync(row_data, v_data, sizeof(Real)*num_cols_, + cudaMemcpyHostToDevice, + cudaStreamPerThread)); v_data += num_cols_; } } + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); } else if (v.Dim() == num_cols_) { dim3 dimGrid, dimBlock; GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(), @@ -2599,16 +2611,19 @@ void VectorBase::CopyRowsFromMat(const CuMatrixBase &mat) { if (CuDevice::Instantiate().Enabled()) { CuTimer tim; if (mat.Stride() == mat.NumCols()) { - cudaMemcpy(data_, mat.Data(), sizeof(Real)*dim_, cudaMemcpyDeviceToHost); + CU_SAFE_CALL(cudaMemcpyAsync(data_, mat.Data(), sizeof(Real)*dim_, + cudaMemcpyDeviceToHost, cudaStreamPerThread)); } else { // we could definitely do better than the following. Real* vec_data = data_; for (MatrixIndexT r = 0; r < mat.NumRows(); r++) { - cudaMemcpy(vec_data, mat.RowData(r), sizeof(Real) * mat.NumCols(), - cudaMemcpyDeviceToHost); + CU_SAFE_CALL(cudaMemcpyAsync(vec_data, mat.RowData(r), + sizeof(Real) * mat.NumCols(), cudaMemcpyDeviceToHost, + cudaStreamPerThread)); vec_data += mat.NumCols(); } } + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuVectorBase::CopyRowsFromMat", tim); } else #endif @@ -3257,9 +3272,9 @@ void CuMatrixBase::AddElements(Real alpha, #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { void *addr = CuDevice::Instantiate().Malloc(input.size() * sizeof(MatrixElement)); - CU_SAFE_CALL(cudaMemcpy(addr, input.data(), - input.size() * sizeof(MatrixElement), - cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(addr, input.data(), + input.size() * sizeof(MatrixElement), + cudaMemcpyHostToDevice, cudaStreamPerThread)); CuTimer tim; int dimBlock(CU1DBLOCK); @@ -3289,8 +3304,9 @@ void CuMatrixBase::AddElements(Real alpha, const CuArrayBase &i if (CuDevice::Instantiate().Enabled()) { CuTimer tim; CuVector tmp_vec(indexes.Dim(), kUndefined); - CU_SAFE_CALL(cudaMemcpy(tmp_vec.Data(), input, indexes.Dim() * sizeof(Real), - cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(tmp_vec.Data(), input, + indexes.Dim() * sizeof(Real), + cudaMemcpyHostToDevice, cudaStreamPerThread)); int dimBlock(CU1DBLOCK); int dimGrid = n_blocks(indexes.Dim(), CU1DBLOCK); diff --git a/src/cudamatrix/cu-packed-matrix.cc b/src/cudamatrix/cu-packed-matrix.cc index 7581b043ae0..c331920c61f 100644 --- a/src/cudamatrix/cu-packed-matrix.cc +++ b/src/cudamatrix/cu-packed-matrix.cc @@ -162,8 +162,9 @@ void CuPackedMatrix::CopyFromPacked(const PackedMatrix &src) { if (CuDevice::Instantiate().Enabled()) { if (num_rows_ == 0) return; // Nothing to do. CuTimer tim; - CU_SAFE_CALL(cudaMemcpy(data_, src.data_, src.SizeInBytes(), - cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(data_, src.data_, src.SizeInBytes(), + cudaMemcpyHostToDevice, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuPackedMatrix::CopyFromPacked2", tim); } else #endif @@ -184,8 +185,9 @@ void CuPackedMatrix::CopyToPacked(PackedMatrix *dst) const { size_t nr = static_cast(num_rows_), num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real); - CU_SAFE_CALL(cudaMemcpy(dst->data_, data_, num_bytes, - cudaMemcpyDeviceToHost)); + CU_SAFE_CALL(cudaMemcpyAsync(dst->data_, data_, num_bytes, + cudaMemcpyDeviceToHost, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuPackedMatrix::CopyToPackedD2H", tim); } else #endif diff --git a/src/cudamatrix/cu-packed-matrix.h b/src/cudamatrix/cu-packed-matrix.h index 0131ba6c101..8ed7ed79f7b 100644 --- a/src/cudamatrix/cu-packed-matrix.h +++ b/src/cudamatrix/cu-packed-matrix.h @@ -122,8 +122,10 @@ class CuPackedMatrix { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Real value; - CU_SAFE_CALL(cudaMemcpy(&value, this->data_ + (r * (r+1)) / 2 + c, - sizeof(Real), cudaMemcpyDeviceToHost)); + CU_SAFE_CALL(cudaMemcpyAsync(&value, this->data_ + (r * (r+1)) / 2 + c, + sizeof(Real), cudaMemcpyDeviceToHost, + cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); return value; } else #endif diff --git a/src/cudamatrix/cu-value.h b/src/cudamatrix/cu-value.h index cab0a3235d7..b5b65479e57 100644 --- a/src/cudamatrix/cu-value.h +++ b/src/cudamatrix/cu-value.h @@ -54,7 +54,9 @@ class CuValue { inline Real operator = (Real r) { // assignment from Real #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { - CU_SAFE_CALL(cudaMemcpy(data_, &r, sizeof(Real), cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(data_, &r, sizeof(Real), + cudaMemcpyHostToDevice, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); return r; } else #endif @@ -71,8 +73,9 @@ class CuValue { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Real value; - CU_SAFE_CALL(cudaMemcpy(&value, data_, - sizeof(Real), cudaMemcpyDeviceToHost)); + CU_SAFE_CALL(cudaMemcpyAsync(&value, data_, sizeof(Real), + cudaMemcpyDeviceToHost, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); return value; } else #endif diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index 7c968c6550d..2e06cffad48 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -221,18 +221,18 @@ void CuVectorBase::CopyRowsFromMat(const MatrixBase &mat) { if (dim_ == 0) return; CuTimer tim; if (mat.Stride() == mat.NumCols()) { - CU_SAFE_CALL(cudaMemcpy(data_, mat.Data(), sizeof(Real)*dim_, - cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(data_, mat.Data(), sizeof(Real)*dim_, + cudaMemcpyHostToDevice, cudaStreamPerThread)); } else { Real* vec_data = data_; for (MatrixIndexT r = 0; r < mat.NumRows(); r++) { - CU_SAFE_CALL(cudaMemcpy(vec_data, mat.RowData(r), + CU_SAFE_CALL(cudaMemcpyAsync(vec_data, mat.RowData(r), sizeof(Real) * mat.NumCols(), - cudaMemcpyHostToDevice)); + cudaMemcpyHostToDevice, cudaStreamPerThread)); vec_data += mat.NumCols(); } } - CU_SAFE_CALL(cudaGetLastError()); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -249,18 +249,21 @@ void MatrixBase::CopyRowsFromVec(const CuVectorBase &v) { if (num_rows_ == 0) return; CuTimer tim; if (Stride() == NumCols()) { - CU_SAFE_CALL(cudaMemcpy(data_, v.Data(), + CU_SAFE_CALL(cudaMemcpyAsync(data_, v.Data(), sizeof(Real)*v.Dim(), - cudaMemcpyDeviceToHost)); + cudaMemcpyDeviceToHost, + cudaStreamPerThread)); } else { const Real* vec_data = v.Data(); for (MatrixIndexT r = 0; r < NumRows(); r++) { - CU_SAFE_CALL(cudaMemcpy(RowData(r), vec_data, + CU_SAFE_CALL(cudaMemcpyAsync(RowData(r), vec_data, sizeof(Real) * NumCols(), - cudaMemcpyDeviceToHost)); + cudaMemcpyDeviceToHost, + cudaStreamPerThread)); vec_data += NumCols(); } } + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -886,7 +889,9 @@ void CuVectorBase::CopyFromVec(const VectorBase &src) { KALDI_ASSERT(src.Dim() == dim_); if (dim_ == 0) return; CuTimer tim; - CU_SAFE_CALL(cudaMemcpy(data_, src.Data(), src.Dim()*sizeof(Real), cudaMemcpyHostToDevice)); + CU_SAFE_CALL(cudaMemcpyAsync(data_, src.Data(), src.Dim()*sizeof(Real), + cudaMemcpyHostToDevice, cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuVector::CopyFromVecH2D", tim); } } else @@ -917,8 +922,10 @@ void CuVectorBase::CopyToVec(VectorBase *dst) const { } else { if (dim_ == 0) return; CuTimer tim; - CU_SAFE_CALL(cudaMemcpy(dst->Data(), this->data_, - sizeof(Real) * dim_, cudaMemcpyDeviceToHost)); + CU_SAFE_CALL(cudaMemcpyAsync(dst->Data(), this->data_, + sizeof(Real) * dim_, cudaMemcpyDeviceToHost, + cudaStreamPerThread)); + CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile(__func__, tim); } } else From abf7a8c197c8251932efe1f9b8cbbf7e8bde8823 Mon Sep 17 00:00:00 2001 From: Shujian2015 Date: Sat, 30 Mar 2019 16:13:17 -0400 Subject: [PATCH 114/235] [egs] Fix small typo in tedlium download script (#3178) --- egs/tedlium/s5_r3/local/download_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/tedlium/s5_r3/local/download_data.sh b/egs/tedlium/s5_r3/local/download_data.sh index c51effdd6fa..0b31a258613 100755 --- a/egs/tedlium/s5_r3/local/download_data.sh +++ b/egs/tedlium/s5_r3/local/download_data.sh @@ -25,7 +25,7 @@ else echo "$0: extracting TEDLIUM_release-3 data" tar xf "TEDLIUM_release-3.tgz" else - echo "$0: not downloading or un-tarring TEDLIUM_release2 because it already exists." + echo "$0: not downloading or un-tarring TEDLIUM_release3 because it already exists." fi fi From 7691d00dff98085e4459188092ed0765062f3230 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Sun, 31 Mar 2019 10:29:00 -0700 Subject: [PATCH 115/235] [github] Add GitHub issue templates (#3187) --- .github/ISSUE_TEMPLATE/bug_report.md | 18 ++++++++++++++++++ .../feature-proposal-discussion.md | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature-proposal-discussion.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000000..660c62884be --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,18 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + + diff --git a/.github/ISSUE_TEMPLATE/feature-proposal-discussion.md b/.github/ISSUE_TEMPLATE/feature-proposal-discussion.md new file mode 100644 index 00000000000..61e797b9ca1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-proposal-discussion.md @@ -0,0 +1,18 @@ +--- +name: Feature proposal or discussion +about: Suggest an idea for Kaldi +title: '' +labels: discussion +assignees: '' + +--- + + From 9ef700fb662223bb49b24baed7b8c9b39ad67a1b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 31 Mar 2019 17:34:34 -0400 Subject: [PATCH 116/235] [build] Add missing dependency to Makefile (#3191) --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a49c912c6ed..d31048895b8 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,7 +164,7 @@ lat: base util hmm tree matrix cudamatrix: base util matrix nnet: base util hmm tree matrix cudamatrix nnet2: base util matrix lat gmm hmm tree transform cudamatrix -nnet3: base util matrix lat gmm hmm tree transform cudamatrix chain fstext +nnet3: base util matrix decoder lat gmm hmm tree transform cudamatrix chain fstext rnnlm: base util matrix cudamatrix nnet3 lm hmm chain: lat hmm tree fstext matrix cudamatrix util base ivector: base util matrix transform tree gmm From 584533433cd3822816a6883b685d42fb2e0d97d1 Mon Sep 17 00:00:00 2001 From: hainan-xv Date: Sun, 31 Mar 2019 23:21:00 -0400 Subject: [PATCH 117/235] [src] Fix bug in pruned lattice rescoring when input lattice has epsilons (#3190) --- src/lat/compose-lattice-pruned.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lat/compose-lattice-pruned.cc b/src/lat/compose-lattice-pruned.cc index c6e4dafc008..57a7432dca0 100644 --- a/src/lat/compose-lattice-pruned.cc +++ b/src/lat/compose-lattice-pruned.cc @@ -771,7 +771,14 @@ void PrunedCompactLatticeComposer::ProcessTransition(int32 src_composed_state, // Note: we expect that ilabel == olabel, since this is a CompactLattice, but this // may not be so if we extend this to work with Lattice. fst::StdArc lm_arc; - if (!det_fst_->GetArc(src_info->lm_state, olabel, &lm_arc)) { + + // the input lattice might have epsilons + if (olabel == 0) { + lm_arc.ilabel = 0; + lm_arc.olabel = 0; + lm_arc.nextstate = src_info->lm_state; + lm_arc.weight = fst::StdArc::Weight(0.0); + } else if (!det_fst_->GetArc(src_info->lm_state, olabel, &lm_arc)) { // for normal language models we don't expect this to happen, but the // appropriate behavior is to do nothing; the composed arc does not exist, // so there is no arc to add and no new state to create. From e711d30f7bb77c3c5fa1e766de1896d1559bd3a1 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Mon, 1 Apr 2019 07:16:17 -0400 Subject: [PATCH 118/235] Some fixes in rnnlm training --- .../s5_gigaword/local/chain/run_tdnn_1g.sh | 9 +++++---- egs/fisher_callhome_spanish/s5_gigaword/run.sh | 16 ++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh index 08e378cf8c5..2f478419a18 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh @@ -202,7 +202,7 @@ fi if [ $stage -le 20 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + if [[ $(hostname -f) == *.clsp.joujhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi @@ -255,9 +255,10 @@ if [ $stage -le 21 ]; then fi +# Let's train first a small RNNLM on Fisher train set rnnlmdir=exp/rnnlm_lstm_tdnn_1b if [ $stage -le 22 ]; then - local/rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; + rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; fi if [ $stage -le 23 ]; then @@ -279,10 +280,10 @@ if [ $stage -le 23 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data} || exit 1; done if [ $gigaword_workdir ]; then - bash local/rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $gigaword_workdir/rnnlm data/${data}_hires/ \ + bash rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $gigaword_workdir/rnnlm data/${data}_hires/ \ ${dir}/decode_${lmtype}_${data} $dir/decode_gigaword_RNNLM_${lmtype}_${data} || exit 1; fi - bash local/rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $rnnlmdir data/${data}_hires/ \ + bash rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $rnnlmdir data/${data}_hires/ \ ${dir}/decode_${lmtype}_${data} $dir/decode_rnnLM_${lmtype}_${data} || exit 1; ) || touch $dir/.error & done diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh index e1c43d24902..95425c29034 100755 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ b/egs/fisher_callhome_spanish/s5_gigaword/run.sh @@ -7,8 +7,12 @@ stage=-1 lmstage=-2 train_rnnlm=false -start_textcleanup=false -addtraintext=true +start_textcleanup=false # WARNING : IT starts from flattening gigaword corpus to preparing text folder. + # If you already have the normalised gigword text somewhere, you can bypass the + # time consuming text cleanup (~1 week) by setting this option false. +addtraintext=true # If true, this option appends the Fisher train text to the Gigaword corpus textfile, to + # perform the A, A + G, Dev type POCOLM training configuration. + # A=fsp train, G=gigword text, num_words_pocolm=110000 train_sgmm2=false @@ -45,9 +49,9 @@ if [ $stage -le -1 ]; then local/fsp_prepare_dict.sh $spanish_lexicon # Let's keep the original dict copy for G2P training cp -r data/local/dict data/local/dict_orig -# ( -# steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error -# ) & + ( + steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error + ) & # Added c,j, v to the non silences phones manually utils/prepare_lang.sh data/local/dict_orig "" data/local/lang_orig data/lang_orig @@ -301,6 +305,6 @@ fi wait; if [ $stage -le 6 ]; then - local/chain/run_tdnn_1g.sh --stage 9 --gigaword-workdir $rnnlm_workdir || exit 1; + local/chain/run_tdnn_1g.sh --stage 0 --gigaword-workdir $rnnlm_workdir || exit 1; fi exit 0; From 8d521c694f0809cfb058568123fc8355406d1b78 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Mon, 1 Apr 2019 07:18:06 -0400 Subject: [PATCH 119/235] Moved s5_gigaword directory to s5 --- egs/fisher_callhome_spanish/s5/RESULTS | 38 ------ egs/fisher_callhome_spanish/s5/cmd.sh | 4 +- .../s5/local/chain/run_tdnn_1g.sh | 16 ++- .../s5/local/clean_abbrevs_text.py | 35 +++++ .../s5/local/clean_txt_dir.sh | 57 +++++++++ egs/fisher_callhome_spanish/s5/local/ctm.sh | 6 +- .../flatten_gigaword/flatten_all_gigaword.sh | 15 +++ .../flatten_gigaword/flatten_one_gigaword.py | 61 +++++++++ .../s5/local/flatten_gigaword/run_flat.sh | 17 +++ .../s5/local/fsp_data_prep.sh | 1 + .../s5/local/fsp_prepare_dict.sh | 5 +- .../s5/local/get_data_weights.pl | 39 ++++++ .../s5/local/get_rnnlm_wordlist.py | 34 +++++ .../s5/local/get_unigram_weights_vocab.py | 33 +++++ .../s5/local/merge_lexicons.py | 7 +- .../s5/local/pocolm_cust.sh | 120 +++++++++++++++++ egs/fisher_callhome_spanish/s5/local/rnnlm.sh | 83 ++++++++++++ .../s5/local/rnnlm/train_rnnlm.sh | 101 --------------- .../s5/local/run_norm.sh | 36 ++++++ .../s5/local/train_pocolm.sh | 54 ++++++++ egs/fisher_callhome_spanish/s5/path.sh | 11 +- egs/fisher_callhome_spanish/s5/run.sh | 121 ++++++++++++------ egs/fisher_callhome_spanish/s5/steps | 2 +- egs/fisher_callhome_spanish/s5/utils | 2 +- 24 files changed, 699 insertions(+), 199 deletions(-) delete mode 100644 egs/fisher_callhome_spanish/s5/RESULTS create mode 100644 egs/fisher_callhome_spanish/s5/local/clean_abbrevs_text.py create mode 100755 egs/fisher_callhome_spanish/s5/local/clean_txt_dir.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_all_gigaword.sh create mode 100644 egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_one_gigaword.py create mode 100755 egs/fisher_callhome_spanish/s5/local/flatten_gigaword/run_flat.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/get_data_weights.pl create mode 100755 egs/fisher_callhome_spanish/s5/local/get_rnnlm_wordlist.py create mode 100644 egs/fisher_callhome_spanish/s5/local/get_unigram_weights_vocab.py create mode 100755 egs/fisher_callhome_spanish/s5/local/pocolm_cust.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/rnnlm.sh delete mode 100755 egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/run_norm.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/train_pocolm.sh diff --git a/egs/fisher_callhome_spanish/s5/RESULTS b/egs/fisher_callhome_spanish/s5/RESULTS deleted file mode 100644 index 66613163cea..00000000000 --- a/egs/fisher_callhome_spanish/s5/RESULTS +++ /dev/null @@ -1,38 +0,0 @@ --------------------------------------------------------------------------------------- -Triphone with mono alignment (small) --------------------------------------------------------------------------------------- -%WER 53.70 [ 21570 / 40170, 2618 ins, 6013 del, 12939 sub ] exp/tri1/decode_dev/wer_14_0.0 - --------------------------------------------------------------------------------------- -Triphone with tri alignments --------------------------------------------------------------------------------------- -%WER 53.18 [ 21364 / 40170, 2889 ins, 5533 del, 12942 sub ] exp/tri2/decode_dev/wer_13_0.0 - --------------------------------------------------------------------------------------- -Triphone + LDA + MLLT --------------------------------------------------------------------------------------- -%WER 46.95 [ 18858 / 40170, 2636 ins, 5197 del, 11025 sub ] exp/tri3a/decode_dev/wer_14_0.0 - --------------------------------------------------------------------------------------- -+ SAT + fMLLR --------------------------------------------------------------------------------------- -%WER 42.86 [ 17217 / 40170, 2556 ins, 4633 del, 10028 sub ] exp/tri4a/decode_dev/wer_15_0.0 - --------------------------------------------------------------------------------------- -+ More leaves and gaussians --------------------------------------------------------------------------------------- -%WER 40.48 [ 16261 / 40170, 2689 ins, 4130 del, 9442 sub ] exp/tri5a/decode_dev/wer_14_0.0 - --------------------------------------------------------------------------------------- -+ bMMI + SGMM --------------------------------------------------------------------------------------- -%WER 38.43 [ 15437 / 40170, 2800 ins, 3685 del, 8952 sub ] exp/sgmm5/decode_dev/wer_10_0.0 -%WER 36.90 [ 14821 / 40170, 2708 ins, 3552 del, 8561 sub ] exp/sgmm5_mmi_b0.1/decode_dev_it1/wer_10_0.0 -%WER 36.09 [ 14499 / 40170, 2511 ins, 3737 del, 8251 sub ] exp/sgmm5_mmi_b0.1/decode_dev_it2/wer_11_0.0 -%WER 35.48 [ 14252 / 40170, 2672 ins, 3370 del, 8210 sub ] exp/sgmm5_mmi_b0.1/decode_dev_it3/wer_10_0.0 -%WER 35.16 [ 14122 / 40170, 2701 ins, 3287 del, 8134 sub ] exp/sgmm5_mmi_b0.1/decode_dev_it4/wer_10_0.0 - --------------------------------------------------------------------------------------- -pNorm-Ensemble DNN --------------------------------------------------------------------------------------- -%WER 35.13 [ 14113 / 40170, 2680 ins, 3405 del, 8028 sub ] exp/tri6a_dnn/decode_dev/wer_11_0.0 diff --git a/egs/fisher_callhome_spanish/s5/cmd.sh b/egs/fisher_callhome_spanish/s5/cmd.sh index 88db78823a5..db97f1fbc6f 100755 --- a/egs/fisher_callhome_spanish/s5/cmd.sh +++ b/egs/fisher_callhome_spanish/s5/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="queue.pl --mem 4G" -export decode_cmd="queue.pl --mem 4G" +export train_cmd="retry.pl queue.pl --mem 8G" +export decode_cmd="retry.pl queue.pl --mem 8G" export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh index 7f407552c2e..2f478419a18 100755 --- a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh +++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh @@ -27,9 +27,10 @@ nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. affix=1g #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. common_egs_dir= reporting_email= +gigaword_workdir= # LSTM/chain options -train_stage=-10 +train_stage=-20 xent_regularize=0.1 dropout_schedule='0,0@0.20,0.3@0.50,0' @@ -156,7 +157,7 @@ if [ $stage -le 19 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" @@ -201,7 +202,7 @@ fi if [ $stage -le 20 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + if [[ $(hostname -f) == *.clsp.joujhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi @@ -254,9 +255,10 @@ if [ $stage -le 21 ]; then fi +# Let's train first a small RNNLM on Fisher train set rnnlmdir=exp/rnnlm_lstm_tdnn_1b if [ $stage -le 22 ]; then - local/rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; + rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; fi if [ $stage -le 23 ]; then @@ -277,7 +279,11 @@ if [ $stage -le 23 ]; then --online-ivector-dir exp/nnet3/ivectors_${data}_hires \ $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data} || exit 1; done - bash local/rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $rnnlmdir data/${data}_hires/ \ + if [ $gigaword_workdir ]; then + bash rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $gigaword_workdir/rnnlm data/${data}_hires/ \ + ${dir}/decode_${lmtype}_${data} $dir/decode_gigaword_RNNLM_${lmtype}_${data} || exit 1; + fi + bash rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $rnnlmdir data/${data}_hires/ \ ${dir}/decode_${lmtype}_${data} $dir/decode_rnnLM_${lmtype}_${data} || exit 1; ) || touch $dir/.error & done diff --git a/egs/fisher_callhome_spanish/s5/local/clean_abbrevs_text.py b/egs/fisher_callhome_spanish/s5/local/clean_abbrevs_text.py new file mode 100644 index 00000000000..7d92eb9fe3a --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/clean_abbrevs_text.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc., + +import os, sys +import re +import codecs + +if len(sys.argv) < 3: + print("Usage : python clean_abbrevs_text.py ") + print(" Processes the text before text normalisation to convert uppercase words as space separated letters") + sys.exit() + +inputfile=codecs.open(sys.argv[1], encoding='utf-8') +outputfile=codecs.open(sys.argv[2], encoding='utf-8', mode='w') + +for line in inputfile: + words = line.split() + textout = "" + wordcnt = 0 + for word in words: + if re.match(r"\b([A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ])+[']?s?\b", word): + if wordcnt > 0: + word = re.sub('\'?s', 's', word) + textout = textout + " ".join(word) + " " + else: + textout = textout + word + " " + else: + textout = textout + word + " " + if word.isalpha(): wordcnt = wordcnt + 1 + outputfile.write(textout.strip()+ '\n') + +inputfile.close() +outputfile.close() diff --git a/egs/fisher_callhome_spanish/s5/local/clean_txt_dir.sh b/egs/fisher_callhome_spanish/s5/local/clean_txt_dir.sh new file mode 100755 index 00000000000..1880b3a90cb --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/clean_txt_dir.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Script to clean up gigaword LM text +# Removes punctuations, does case normalization + +stage=0 +nj=500 + +. ./path.sh +. ./cmd.sh +. ./utils/parse_options.sh + +set -euo pipefail + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1; +fi + +if [ ! -s `which normalizer_main` ] ; then + echo "Sparrowhawk normalizer was not found installed !" + echo "Go to $KALDI_ROOT/tools and execute install_sparrowhawk.sh and try again!" + exit 1 +fi + +txtdir=$1 +textdir=$(realpath $txtdir) +outdir=$(realpath $2) + +workdir=$outdir/tmp +if [ $stage -le 0 ]; then + rm -rf $outdir + mkdir -p $workdir + mkdir -p $textdir/splits + mkdir -p $outdir/data + split -l 1000000 $textdir/in.txt $textdir/splits/out + numsplits=0 + for x in $textdir/splits/*; do + numsplits=$((numsplits+1)) + ln -s $x $outdir/data/$numsplits + done + echo $numsplits + cp $SPARROWHAWK_ROOT/documentation/grammars/sentence_boundary_exceptions.txt . + $train_cmd --max_jobs_run 100 JOB=1:$numsplits $outdir/sparrowhawk/log/JOB.log \ + local/run_norm.sh \ + sparrowhawk_configuration.ascii_proto \ + $SPARROWHAWK_ROOT/language-resources/en/sparrowhawk/ \ + $outdir/data \ + JOB \ + $outdir/sparrowhawk/ + cat $outdir/sparrowhawk/*.txt | sed "/^$/d" > $outdir/text_normalized + + # check if numbers are there in normalized output + awk '{for(i=1;i<=NF;i++) {if (!seen[$i]) {print $i; seen[$i]=1} }}' \ + $outdir/text_normalized > $outdir/unique_words + grep "[0-9]" $outdir/unique_words | sort -u > $outdir/numbers +fi diff --git a/egs/fisher_callhome_spanish/s5/local/ctm.sh b/egs/fisher_callhome_spanish/s5/local/ctm.sh index 62860a10b7b..7d09f574580 100755 --- a/egs/fisher_callhome_spanish/s5/local/ctm.sh +++ b/egs/fisher_callhome_spanish/s5/local/ctm.sh @@ -19,9 +19,9 @@ fi steps/get_ctm.sh $data_dir $lang_dir $decode_dir # Make sure that channel markers match -#perl -i -pe "s:\s.*_fsp-([AB]): \1:g" data/dev/stm -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} perl -i -pe 's:fsp\s1\s:fsp A :g' {} -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} perl -i -pe 's:fsp\s2\s:fsp B :g' {} +#sed -i "s:\s.*_fsp-([AB]): \1:g" data/dev/stm +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s1\s:fsp A :g' {} +#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s2\s:fsp B :g' {} # Get the environment variables . /export/babel/data/software/env.sh diff --git a/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_all_gigaword.sh b/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_all_gigaword.sh new file mode 100755 index 00000000000..242359e7c28 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_all_gigaword.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +# Path to Gigaword corpus with all data files decompressed. +export GIGAWORDDIR=$1 +# The directory to write output to +export OUTPUTDIR=$2 +# The number of jobs to run at once +export NUMJOBS=$3 + +echo "Flattening Gigaword with ${NUMJOBS} processes..." +mkdir -p $OUTPUTDIR +find ${GIGAWORDDIR}/data/*/* -type f -print -exec local/flatten_gigaword/run_flat.sh {} ${OUTPUTDIR} \; +echo "Combining the flattened files into one..." +cat ${OUTPUTDIR}/*.flat > ${OUTPUTDIR}/flattened_gigaword.txt diff --git a/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_one_gigaword.py b/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_one_gigaword.py new file mode 100644 index 00000000000..29f6766dd84 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/flatten_one_gigaword.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +import logging +import os +import re +import spacy +import gzip + +from argparse import ArgumentParser +from bs4 import BeautifulSoup + +en_nlp = spacy.load("es") + + +def flatten_one_gigaword_file(file_path): + f = gzip.open(file_path) + html = f.read() + # Parse the text with BeautifulSoup + soup = BeautifulSoup(html, "html.parser") + + # Iterate over all

items and get the text for each. + all_paragraphs = [] + for paragraph in soup("p"): + # Turn inter-paragraph newlines into spaces + paragraph = paragraph.get_text() + paragraph = re.sub(r"\n+", "\n", paragraph) + paragraph = paragraph.replace("\n", " ") + # Tokenize the paragraph into words + tokens = en_nlp.tokenizer(paragraph) + words = [str(token) for token in tokens if not + str(token).isspace()] + if len(words) < 3: + continue + all_paragraphs.append(words) + # Return a list of strings, where each string is a + # space-tokenized paragraph. + return [" ".join(paragraph) for paragraph in all_paragraphs] + + +if __name__ == "__main__": + log_fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + logging.basicConfig(level=logging.INFO, format=log_fmt) + logger = logging.getLogger(__name__) + + parser = ArgumentParser(description=("Flatten a gigaword data file for " + "use in language modeling.")) + parser.add_argument("--gigaword-path", required=True, + metavar="", type=str, + help=("Path to Gigaword directory, with " + "all .gz files unzipped.")) + parser.add_argument("--output-dir", required=True, metavar="", + type=str, help=("Directory to write final flattened " + "Gigaword file.")) + + A = parser.parse_args() + all_paragraphs = flatten_one_gigaword_file(A.gigaword_path) + output_path = os.path.join(A.output_dir, + os.path.basename(A.gigaword_path) + ".flat") + with open(output_path, "w") as output_file: + for paragraph in all_paragraphs: + output_file.write("{}\n".format(paragraph)) diff --git a/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/run_flat.sh b/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/run_flat.sh new file mode 100755 index 00000000000..6b236be0ab9 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/flatten_gigaword/run_flat.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -e + +. ./path_venv.sh + +# Path to Gigaword corpus with all data files decompressed. +GIGAWORDPATH=$1 +# The directory to write output to +OUTPUTDIR=$2 +file=$(basename ${GIGAWORDPATH}) +if [ ! -e ${OUTPUTDIR}/${file}.flat ]; then + echo "flattening to ${OUTPUTDIR}/${file}.flat" + python local/flatten_gigaword/flatten_one_gigaword.py --gigaword-path ${GIGAWORDPATH} --output-dir ${OUTPUTDIR} +else + echo "skipping ${file}.flat" +fi + diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh index 11d65da3e95..22b98a6c9db 100755 --- a/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh +++ b/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh @@ -133,6 +133,7 @@ if [ $stage -le 2 ]; then sed 's:::g' | \ sed 's:foreign>::g' | \ + sed 's:\[noise\]:[noise] :g' | \ sed 's:>::g' | \ #How do you handle numbers? grep -v '()' | \ diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh b/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh index 779298305c4..7b2de2db392 100755 --- a/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh +++ b/egs/fisher_callhome_spanish/s5/local/fsp_prepare_dict.sh @@ -105,8 +105,9 @@ if [ $stage -le 4 ]; then cp "$tmpdir/lexicon.1" "$tmpdir/lexicon.2" # Add prons for laughter, noise, oov - w=$(grep -v sil $dir/silence_phones.txt | tr '\n' '|') - perl -i -ne "print unless /\[(${w%?})\]/" $tmpdir/lexicon.2 + for w in `grep -v sil $dir/silence_phones.txt`; do + sed -i "/\[$w\]/d" $tmpdir/lexicon.2 + done for w in `grep -v sil $dir/silence_phones.txt`; do echo "[$w] $w" diff --git a/egs/fisher_callhome_spanish/s5/local/get_data_weights.pl b/egs/fisher_callhome_spanish/s5/local/get_data_weights.pl new file mode 100755 index 00000000000..ca5b2a46f8e --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/get_data_weights.pl @@ -0,0 +1,39 @@ +#!/usr/bin/env perl + +# Nagendra Kumar Goel + +# This takes two arguments: +# 1) Pocolm training output folder +# 2) rnnlm weights file name (for output) + +use POSIX; +use List::Util qw[min max]; + +if (@ARGV != 2) { + die "Usage: get_data_weights.pl \n"; +} + +$pdir = shift @ARGV; +$out = shift @ARGV; + +open(P, "<$pdir/metaparameters") || die "Could not open $pdir/metaparameters"; +open(N, "<$pdir/names") || die "Could not open $pdir/names" ; +open(O, ">$out") || die "Could not open $out for writing" ; + +my %scores = (); + +while() { + @n = split(/\s/,$_); + $name = $n[1]; + $w =

; + @w = split(/\s/,$w); + $weight = $w[1]; + $scores{$name} = $weight; +} + +$min = min(values %scores); + +for(keys %scores) { + $weightout = POSIX::ceil($scores{$_} / $min); + print O "$_\t1\t$weightout\n"; +} diff --git a/egs/fisher_callhome_spanish/s5/local/get_rnnlm_wordlist.py b/egs/fisher_callhome_spanish/s5/local/get_rnnlm_wordlist.py new file mode 100755 index 00000000000..fc13a7af701 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/get_rnnlm_wordlist.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc. + +import os, sys + +if len(sys.argv) < 5: + print( "Usage: python get_rnnlm_wordlist.py ") + sys.exit() + +lexicon_words = open(sys.argv[1], 'r', encoding="utf-8") +pocolm_words = open(sys.argv[2], 'r', encoding="utf-8") +rnnlm_wordsout = open(sys.argv[3], 'w', encoding="utf-8") +oov_wordlist = open(sys.argv[4], 'w', encoding="utf-8") + +line_count=0 +lexicon=[] + +for line in lexicon_words: + lexicon.append(line.split()[0]) + rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') + line_count = line_count + 1 + +for line in pocolm_words: + if not line.split()[0] in lexicon: + oov_wordlist.write(line.split()[0]+'\n') + rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') + line_count = line_count + 1 + +lexicon_words.close() +pocolm_words.close() +rnnlm_wordsout.close() +oov_wordlist.close() diff --git a/egs/fisher_callhome_spanish/s5/local/get_unigram_weights_vocab.py b/egs/fisher_callhome_spanish/s5/local/get_unigram_weights_vocab.py new file mode 100644 index 00000000000..3ecd16772d7 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/get_unigram_weights_vocab.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# 2018 Saikiran Valluri, GoVivace inc. + +import os, sys + +if len(sys.argv) < 3: + print("Usage : python . ") + print(" Used for generating the unigram weights for second pass vocabulary from the first pass pocolm training metaparameters.") + sys.exit() + +pocolmdir=sys.argv[1] +unigramwts=open(sys.argv[2], 'w') + +names = open(pocolmdir+"/names", 'r') +metaparams = open(pocolmdir+"/metaparameters", 'r') + +name_mapper={} +for line in names: + fields=line.split() + name_mapper[fields[0]] = fields[1] + +lns = metaparams.readlines() +for lineno in range(len(name_mapper.keys())): + line = lns[lineno] + fileid = line.split()[0].split("_")[-1] + weight = line.split()[1] + unigramwts.write(name_mapper[fileid] + " " + weight + "\n") + +names.close() +unigramwts.close() +metaparams.close() diff --git a/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py b/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py index b42eb52d20a..94546dc44c3 100755 --- a/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py +++ b/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py @@ -1,11 +1,12 @@ -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# 2018 Saikiran Valluri, GoVivace inc., Avaaya #!/usr/bin/env python # -*- coding: utf-8 -*- # +# 2018 Saikiran Valluri, GoVivace inc., Avaaya + # Merges unique words from Spanish Fisher, Gigaword and the LDC spanish lexicon from __future__ import print_function -import sys, re +import sys +import re import json import codecs import operator diff --git a/egs/fisher_callhome_spanish/s5/local/pocolm_cust.sh b/egs/fisher_callhome_spanish/s5/local/pocolm_cust.sh new file mode 100755 index 00000000000..0e71be29119 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/pocolm_cust.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +# this script generates Pocolm-estimated language models with various +# data sources in data/text folder and places the output in data/lm. + +set -euo pipefail + +. ./path.sh + +export POCOLM_ROOT=$(cd $KALDI_ROOT/tools/pocolm/; pwd -P) +export PATH=$PATH:$POCOLM_ROOT/scripts + + +wordlist=None +num_word=100000 +pocolm_stage=1 +ngram_order=3 +lm_dir= +arpa_dir= +textdir= +max_memory='--max-memory=8G' + +. ./cmd.sh +. ./utils/parse_options.sh + + +# If you do not want to set memory limitation for "sort", you can use +#max_memory= +# Choices for the max-memory can be: +# 1) integer + 'K', 'M', 'G', ... +# 2) integer + 'b', meaning unit is byte and no multiplication +# 3) integer + '%', meaning a percentage of memory +# 4) integer, default unit is 'K' + +fold_dev_opt= +# If you want to fold the dev-set in to the 'swbd1' set to produce the final +# model, un-comment the following line. For use in the Kaldi example script for +# ASR, this isn't suitable because the 'dev' set is the first 10k lines of the +# switchboard data, which we also use as dev data for speech recognition +# purposes. +#fold_dev_opt="--fold-dev-into=swbd1" + +bypass_metaparam_optim_opt= +# If you want to bypass the metaparameter optimization steps with specific metaparameters +# un-comment the following line, and change the numbers to some appropriate values. +# You can find the values from output log of train_lm.py. +# These example numbers of metaparameters is for 3-gram model running with train_lm.py. +# the dev perplexity should be close to the non-bypassed model. +#bypass_metaparam_optim_opt="--bypass-metaparameter-optimization=0.091,0.867,0.753,0.275,0.100,0.018,0.902,0.371,0.183,0.070" +# Note: to use these example parameters, you may need to remove the .done files +# to make sure the make_lm_dir.py be called and tain only 3-gram model +#for order in 3; do +#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done + +limit_unk_history_opt= +# If you want to limit the left of in the history of a n-gram +# un-comment the following line +#limit_unk_history_opt="--limit-unk-history=true" + +for order in ${ngram_order}; do + # decide on the vocabulary. + # Note: you'd use --wordlist if you had a previously determined word-list + # that you wanted to use. + lm_name="${num_word}_${order}" + min_counts='' + # Note: the following might be a more reasonable setting: + # min_counts='fisher=2 swbd1=1' + if [ -n "${min_counts}" ]; then + lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`" + fi + unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm + train_lm.py --num-words=${num_word} --num-splits=5 --warm-start-ratio=10 ${max_memory} \ + --min-counts=${min_counts} \ + --keep-int-data=true ${fold_dev_opt} ${bypass_metaparam_optim_opt} \ + ${limit_unk_history_opt} ${textdir} ${order} ${lm_dir}/work ${unpruned_lm_dir} + + if [ $pocolm_stage -eq 2 ];then + mkdir -p ${arpa_dir} + format_arpa_lm.py ${max_memory} ${unpruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_unpruned.arpa.gz + + # example of pruning. note: the threshold can be less than or more than one. + get_data_prob.py ${max_memory} ${textdir}/dev.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' + for threshold in 1.0 2.0 4.0; do + pruned_lm_dir=${lm_dir}/${lm_name}_prune${threshold}.pocolm + prune_lm_dir.py --final-threshold=${threshold} ${max_memory} ${unpruned_lm_dir} ${pruned_lm_dir} 2>&1 | tail -n 5 | head -n 3 + get_data_prob.py ${max_memory} ${textdir}/dev.txt ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' + + format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${threshold}.arpa.gz + + done + + # example of pruning by size. + size=1000000 + pruned_lm_dir=${lm_dir}/${lm_name}_prune${size}.pocolm + prune_lm_dir.py --target-num-ngrams=${size} ${max_memory} ${unpruned_lm_dir} ${pruned_lm_dir} 2>&1 | tail -n 8 | head -n 6 | grep -v 'log-prob changes' + get_data_prob.py ${textdir}/dev.txt ${max_memory} ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' + + format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${size}.arpa.gz + fi +done + +# (run local/srilm_baseline.sh ${num_word} to see the following result e.g. local/srilm_baseline.sh 40000 ) + +# the following does does some self-testing, including +# that the computed derivatives are accurate. +# local/self_test.sh + +# perplexities from pocolm-estimated language models with pocolm's interpolation +# method from orders 3, 4, and 5 are: +# order 3: optimize_metaparameters.py: final perplexity without barrier function was -4.358818 (perplexity: 78.164689) +# order 4: optimize_metaparameters.py: final perplexity without barrier function was -4.309507 (perplexity: 74.403797) +# order 5: optimize_metaparameters.py: final perplexity without barrier function was -4.301741 (perplexity: 73.828181) + +# note, the perplexities from pocolm-estimated language models with SRILM's +# interpolation from orders 3 and 4 are (from local/pocolm_with_srilm_combination.sh), +# 78.8449 and 75.2202 respectively. + +# note, the perplexities from SRILM-estimated language models with SRILM's +# interpolation tool from orders 3 and 4 are (from local/srilm_baseline.sh), +# 78.9056 and 75.5528 respectively. diff --git a/egs/fisher_callhome_spanish/s5/local/rnnlm.sh b/egs/fisher_callhome_spanish/s5/local/rnnlm.sh new file mode 100755 index 00000000000..3850910f312 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/rnnlm.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) +# 2015 Guoguo Chen +# 2017 Hainan Xu +# 2017 Xiaohui Zhang + +# This script trains LMs on the swbd LM-training data. + +# rnnlm/train_rnnlm.sh: best iteration (out of 35) was 34, linking it to final iteration. +# rnnlm/train_rnnlm.sh: train/dev perplexity was 41.9 / 50.0. +# Train objf: -5.07 -4.43 -4.25 -4.17 -4.12 -4.07 -4.04 -4.01 -3.99 -3.98 -3.96 -3.94 -3.92 -3.90 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83 -3.82 -3.81 -3.80 -3.79 -3.78 -3.78 -3.77 -3.77 -3.76 -3.75 -3.74 -3.73 -3.73 -3.72 -3.71 +# Dev objf: -10.32 -4.68 -4.43 -4.31 -4.24 -4.19 -4.15 -4.13 -4.10 -4.09 -4.05 -4.03 -4.02 -4.00 -3.99 -3.98 -3.98 -3.97 -3.96 -3.96 -3.95 -3.94 -3.94 -3.94 -3.93 -3.93 -3.93 -3.92 -3.92 -3.92 -3.92 -3.91 -3.91 -3.91 -3.91 + + +dir=Spanish_gigawrd/rnnlm +pocolm_dir=Spanish_gigawrd/work_pocolm/lm/110000_3.pocolm_pruned +wordslist= +embedding_dim=1024 +lstm_rpd=256 +lstm_nrpd=256 +stage=0 +train_stage=-30 +text_dir=Spanish_gigawrd/text_lm + +. ./cmd.sh +. ./utils/parse_options.sh + +mkdir -p $dir/config +set -e + +for f in $text_dir/dev.txt; do + [ ! -f $f ] && \ + echo "$0: expected file $f to exist;" && exit 1 +done + +if [ $stage -le 0 ]; then + if [ -f $text_dir/unigram_weights ] ; then + mv $text_dir/unigram_weights $pocolm_dir/ + fi + cp $wordslist $dir/config/words.txt + n=`cat $dir/config/words.txt | wc -l` + echo " $n" >> $dir/config/words.txt + + # words that are not present in words.txt but are in the training or dev data, will be + # mapped to during training. + echo "" >$dir/config/oov.txt + local/get_data_weights.pl $pocolm_dir $dir/config/data_weights.txt + rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \ + --unk-word="" \ + --data-weights-file=$dir/config/data_weights.txt \ + $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt + + # choose features + rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ + --use-constant-feature=true \ + --special-words=',,,,[noise],[laughter]' \ + $dir/config/words.txt > $dir/config/features.txt +fi + +if [ $stage -le 1 ]; then + cat <$dir/config/xconfig + input dim=$embedding_dim name=input + relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1)) + fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd + relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3)) + fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd + relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3)) + output-layer name=output include-log-softmax=false dim=$embedding_dim +EOF + rnnlm/validate_config_dir.sh $text_dir $dir/config +fi + +if [ $stage -le 2 ]; then + rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir +fi + +if [ $stage -le 3 ]; then + rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 2 \ + --stage $train_stage --num-epochs 5 --cmd "$train_cmd" $dir +fi + +exit 0 diff --git a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh deleted file mode 100755 index 3713fe228d6..00000000000 --- a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson -# 2017 Hainan Xu -# 2017 Ke Li - -# This script is similar to rnnlm_lstm_tdnn_a.sh except for adding L2 regularization. - -# local/rnnlm/train_rnnlm.sh: best iteration (out of 18) was 17, linking it to final iteration. -# local/rnnlm/train_rnnlm.sh: train/dev perplexity was 45.6 / 68.7. -# Train objf: -651.50 -4.44 -4.26 -4.15 -4.08 -4.03 -4.00 -3.97 -3.94 -3.92 -3.90 -3.89 -3.88 -3.86 -3.85 -3.84 -3.83 -3.82 -# Dev objf: -10.76 -4.68 -4.47 -4.38 -4.33 -4.29 -4.28 -4.27 -4.26 -4.26 -4.25 -4.24 -4.24 -4.24 -4.23 -4.23 -4.23 -4.23 - -# Begin configuration section. -dir=exp/rnnlm_lstm_tdnn_1b -embedding_dim=200 -embedding_l2=0.005 # embedding layer l2 regularize -comp_l2=0.005 # component-level l2 regularize -output_l2=0.005 # output-layer l2 regularize -epochs=90 -mic= -stage=-10 -train_stage=0 - -. ./cmd.sh -. ./utils/parse_options.sh -[ -z "$cmd" ] && cmd=$train_cmd - -train=data/train/text -dev=data/dev2/text # We at no stage in run.sh should decode dev2 partition for results! -wordlist=data/lang/words.txt -text_dir=data/local/rnnlm/text -mkdir -p $dir/config -set -e - -for f in $train $dev $wordlist; do - [ ! -f $f ] && \ - echo "$0: expected file $f to exist; search for run.sh and utils/prepare_lang.sh in run.sh" && exit 1 -done - -if [ $stage -le 0 ]; then - mkdir -p $text_dir - cat $train | cut -d ' ' -f2- > $text_dir/ami.txt - cat $dev | cut -d ' ' -f2- > $text_dir/dev.txt -fi - -if [ $stage -le 1 ]; then - cp $wordlist $dir/config/ - n=`cat $dir/config/words.txt | wc -l` - echo " $n" >> $dir/config/words.txt - - # words that are not present in words.txt but are in the training or dev data, will be - # mapped to during training. - echo "" >$dir/config/oov.txt - - cat > $dir/config/data_weights.txt <$dir/config/unigram_probs.txt - - # choose features - rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ - --use-constant-feature=true \ - --top-word-features 10000 \ - --min-frequency 1.0e-03 \ - --special-words=',,,,[noise],[laughter]' \ - $dir/config/words.txt > $dir/config/features.txt - -lstm_opts="l2-regularize=$comp_l2" -tdnn_opts="l2-regularize=$comp_l2" -output_opts="l2-regularize=$output_l2" - - cat >$dir/config/xconfig < $dir/normalize/$job/substitute.sh + +bash $dir/normalize/$job/substitute.sh | \ + sed "s: 's:'s:g" | sed "s: 'm:'m:g" | \ + sed "s: \s*: :g" | tr 'A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ' 'a-zâáàäêéèëïíîöóôöúùûñç' > $dir/normalize/$job/text +normalizer_main --config=$config --path_prefix=$path_prefix <$dir/normalize/$job/text >$dir/$job.txt + +exit 0; diff --git a/egs/fisher_callhome_spanish/s5/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5/local/train_pocolm.sh new file mode 100755 index 00000000000..b8b3ca35ef9 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/train_pocolm.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +stage=-2 +num_words_pocolm=110000 +prune_size=1000000 + +. ./path.sh +. ./cmd.sh +. ./utils/parse_options.sh + +set -euo pipefail + +export POCOLM_ROOT=$(cd $KALDI_ROOT/tools/pocolm/; pwd -P) +export PATH=$PATH:$POCOLM_ROOT/scripts + +textdir=$1 +pocolm_dir=$2 + + +if [ $stage -le -2 ]; then + echo "****" + echo " POCOLM experiment : Running STAGE 1 : 2-gram Pocolm general closed vocabulary model" + echo " Will estimate the metaparams to be used as unigram weights for stage 2 ....." + echo "****" + if [ -e "$textdir"/unigram_weights ]; then + rm "$textdir"/unigram_weights + fi + if [ -e "$pocolm_dir" ]; then + rm -r "$pocolm_dir" + fi + + bash local/pocolm_cust.sh --num-word 0 --ngram-order 2 --pocolm-stage 1 --lm-dir "$pocolm_dir"/lm \ + --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" + +fi + +if [ $stage -le -1 ];then + echo "********" + echo "POCOLM experiment : RUNNING STAGE 2 : 3gram POCOLM using unigram wts estimates in 1st stage....." + echo "********" + + echo " " > "$pocolm_dir"/lm/work/.unigram_weights.done + python local/get_unigram_weights_vocab.py "$pocolm_dir"/lm/0_2.pocolm/ "$textdir"/unigram_weights + bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ + --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" + prune_lm_dir.py --target-num-ngrams=$prune_size "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm \ + "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned_"$prune_size" + mkdir -p "$pocolm_dir"/arpa + format_arpa_lm.py "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned_"$prune_size" | \ + gzip -c > "$pocolm_dir"/arpa/"$num_words_pocolm"_3_pruned_"$prune_size".arpa.gz +fi + + +exit 0; diff --git a/egs/fisher_callhome_spanish/s5/path.sh b/egs/fisher_callhome_spanish/s5/path.sh index 17ffb0369f8..2993311fd90 100755 --- a/egs/fisher_callhome_spanish/s5/path.sh +++ b/egs/fisher_callhome_spanish/s5/path.sh @@ -1,6 +1,13 @@ -export KALDI_ROOT=`pwd`/../../.. +export KALDI_ROOT=`pwd`/../../../ +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh +export LD_LIBRARY_PATH=/home/dpovey/libs + +export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk +export PATH=$SPARROWHAWK_ROOT/bin:$PATH export LC_ALL=C -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dpovey/libs +export LANG=C + +source ~/anaconda/bin/activate py36 diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 6e2752a7b68..95425c29034 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -4,14 +4,22 @@ # Copyright 2014 Gaurav Kumar. Apache 2.0 # Recipe for Fisher/Callhome-Spanish -stage=0 -train_stage=-20 +stage=-1 +lmstage=-2 +train_rnnlm=false +start_textcleanup=false # WARNING : IT starts from flattening gigaword corpus to preparing text folder. + # If you already have the normalised gigword text somewhere, you can bypass the + # time consuming text cleanup (~1 week) by setting this option false. +addtraintext=true # If true, this option appends the Fisher train text to the Gigaword corpus textfile, to + # perform the A, A + G, Dev type POCOLM training configuration. + # A=fsp train, G=gigword text, +num_words_pocolm=110000 train_sgmm2=false # call the next line with the directory where the Spanish Fisher data is # (the values below are just an example). sfisher_speech=/export/corpora/LDC/LDC2010S01 -sfisher_transcripts=/export/corpora/LDC/LDC2010T04 +sfisher_transcripts=/export/c03/svalluri//LDC2010T04 spanish_lexicon=/export/corpora/LDC/LDC96L16 split=local/splits/split_fisher @@ -19,15 +27,17 @@ callhome_speech=/export/corpora/LDC/LDC96S35 callhome_transcripts=/export/corpora/LDC/LDC96T17 split_callhome=local/splits/split_callhome +gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data +rnnlm_workdir=workdir_rnnlm_Spanish_08032019 mfccdir=`pwd`/mfcc . ./cmd.sh if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; -set -e +set -eou pipefail -if [ $stage -le 1 ]; then +if [ $stage -le -1 ]; then local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts local/callhome_data_prep.sh $callhome_speech $callhome_transcripts @@ -37,19 +47,14 @@ if [ $stage -le 1 ]; then # ES gigaword corpus to bring the total to 64k words. The ES frequency sorted # wordlist is downloaded if it is not available. local/fsp_prepare_dict.sh $spanish_lexicon + # Let's keep the original dict copy for G2P training + cp -r data/local/dict data/local/dict_orig + ( + steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error + ) & # Added c,j, v to the non silences phones manually - utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang - - # Make sure that you do not use your test and your dev sets to train the LM - # Some form of cross validation is possible where you decode your dev/set based on an - # LM that is trained on everything but that that conversation - # When in doubt about what your data partitions should be use local/fsp_ideal_data_partitions.pl - # to get the numbers. Depending on your needs, you might have to change the size of - # the splits within that file. The default paritions are based on the Kaldi + Joshua - # requirements which means that I have very large dev and test sets - local/fsp_train_lms.sh $split - local/fsp_create_test_lang.sh + utils/prepare_lang.sh data/local/dict_orig "" data/local/lang_orig data/lang_orig utils/fix_data_dir.sh data/local/data/train_all @@ -70,34 +75,65 @@ if [ $stage -le 1 ]; then cp -r data/local/data/callhome_train_all data/callhome_train_all - # Creating data partitions for the pipeline - # We need datasets for both the ASR and SMT system - # We have 257455 utterances left, so the partitions are roughly as follows - # ASR Train : 100k utterances - # ASR Tune : 17455 utterances - # ASR Eval : 20k utterances - # MT Train : 100k utterances - # MT Tune : Same as the ASR eval set (Use the lattices from here) - # MT Eval : 20k utterances - # The dev and the test sets need to be carefully chosen so that there is no conversation/speaker - # overlap. This has been setup and the script local/fsp_ideal_data_partitions provides the numbers that are needed below. - # As noted above, the LM has not been trained on the dev and the test sets. - #utils/subset_data_dir.sh --first data/train_all 158126 data/dev_and_test - #utils/subset_data_dir.sh --first data/dev_and_test 37814 data/asr_dev_and_test - #utils/subset_data_dir.sh --last data/dev_and_test 120312 data/mt_train_and_test - #utils/subset_data_dir.sh --first data/asr_dev_and_test 17662 data/dev - #utils/subset_data_dir.sh --last data/asr_dev_and_test 20152 data/test - #utils/subset_data_dir.sh --first data/mt_train_and_test 100238 data/mt_train - #utils/subset_data_dir.sh --last data/mt_train_and_test 20074 data/mt_test - #rm -r data/dev_and_test - #rm -r data/asr_dev_and_test - #rm -r data/mt_train_and_test - local/create_splits.sh $split local/callhome_create_splits.sh $split_callhome + fi +if $start_textcleanup; then + echo "WARNING : Starting from cleaning up and normalizing the Gigword text" + echo " This might take few days........... You can opt out this stage " + echo " by setting start_textcleanup=false, and having text_lm ready inside rnnlm_workdir." + + if [ $stage -le 0 ]; then + mkdir -p "$rnnlm_workdir"/gigaword_rawtext + local/flatten_gigaword/flatten_all_gigaword.sh "$gigaword_datapath" "$rnnlm_workdir"/flattened_gigaword_corpus 24 + cat "$rnnlm_workdir"/flattened_gigaword_corpus/*.flat > "$rnnlm_workdir"/gigaword_rawtext/in.txt + local/clean_txt_dir.sh "$rnnlm_workdir"/gigaword_rawtext/ \ + "$rnnlm_workdir"/normalised_gigaword_corpus/ + mkdir -p "$rnnlm_workdir"/text_lm + cut -d " " -f 2- data/train/text > "$rnnlm_workdir"/text_lm/train.txt + cut -d " " -f 2- data/dev2/text > "$rnnlm_workdir"/text_lm/dev.txt # For RNNLM and POCOLM training we use dev2/text as dev file. + cp "$rnnlm_workdir"/normalised_gigaword_corpus/text_normalized "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt + if $addtraintext; then + cat "$rnnlm_workdir"/text_lm/train.txt >> "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt + fi + fi +fi + +if [ $stage -le 1 ]; then + local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm + local/get_rnnlm_wordlist.py data/lang_orig/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ + "$rnnlm_workdir"/rnnlm_wordlist "$rnnlm_workdir"/oov_pocolmwords + if $train_rnnlm; then + local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ + --wordslist "$rnnlm_workdir"/rnnlm_wordlist --text-dir "$rnnlm_workdir"/text_lm + fi +fi + + if [ $stage -le 2 ]; then + wait # wait till G2P training finishes + if [ -f exp/g2p/.error ]; then + rm exp/g2p/.error || true + echo "Fail to train the G2P model." && exit 1; + fi + steps/dict/apply_g2p_seq2seq.sh "$rnnlm_workdir"/oov_pocolmwords exp/g2p "$rnnlm_workdir"/oov_g2p.lex + cat "$rnnlm_workdir"/oov_g2p.lex/lexicon.lex data/local/dict/lexicon.txt | sed "/^$/d" |sort | uniq > "$rnnlm_workdir"/lexicon_extended.txt + cp "$rnnlm_workdir"/lexicon_extended.txt data/local/dict/lexicon.txt # Replacing original lexicon with extended version. + + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang + + # Make sure that you do not use your test and your dev sets to train the LM + # Some form of cross validation is possible where you decode your dev/set based on an + # LM that is trained on everything but that that conversation + # When in doubt about what your data partitions should be use local/fsp_ideal_data_partitions.pl + # to get the numbers. Depending on your needs, you might have to change the size of + # the splits within that file. The default paritions are based on the Kaldi + Joshua + # requirements which means that I have very large dev and test sets + local/fsp_train_lms.sh $split + local/fsp_create_test_lang.sh + # Now compute CMVN stats for the train, dev and test subsets steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir @@ -264,8 +300,11 @@ for iter in 1 2 3 4; do data/lang_test data/dev/ exp/sgmm5/decode_dev $decode done ) & - fi -local/chain/run_tdnn_1g.sh --stage $stage --train-stage $train_stage || exit 1; +wait; + +if [ $stage -le 6 ]; then + local/chain/run_tdnn_1g.sh --stage 0 --gigaword-workdir $rnnlm_workdir || exit 1; +fi exit 0; diff --git a/egs/fisher_callhome_spanish/s5/steps b/egs/fisher_callhome_spanish/s5/steps index 6e99bf5b5ad..1b186770dd1 120000 --- a/egs/fisher_callhome_spanish/s5/steps +++ b/egs/fisher_callhome_spanish/s5/steps @@ -1 +1 @@ -../../wsj/s5/steps \ No newline at end of file +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5/utils b/egs/fisher_callhome_spanish/s5/utils index b240885218f..a3279dc8679 120000 --- a/egs/fisher_callhome_spanish/s5/utils +++ b/egs/fisher_callhome_spanish/s5/utils @@ -1 +1 @@ -../../wsj/s5/utils \ No newline at end of file +../../wsj/s5/utils/ \ No newline at end of file From be019cd1fd664de4cf6ceed08210c49174db4aa4 Mon Sep 17 00:00:00 2001 From: armusc <46787089+armusc@users.noreply.github.com> Date: Tue, 2 Apr 2019 03:34:56 +0200 Subject: [PATCH 120/235] [scripts] Fix bug in extend_lang.sh regarding extra_disambig.txt (#3195) --- egs/wsj/s5/utils/lang/extend_lang.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/lang/extend_lang.sh b/egs/wsj/s5/utils/lang/extend_lang.sh index c13d5d3e78b..c8f680a12fb 100755 --- a/egs/wsj/s5/utils/lang/extend_lang.sh +++ b/egs/wsj/s5/utils/lang/extend_lang.sh @@ -131,7 +131,7 @@ for n in $(seq 0 $ndisambig); do sym='#'$n; if ! grep -w -q "$sym" $dir/phones/disambig.txt; then echo "$sym"; fi done > $tmpdir/extra_disambig.txt highest_number=$(tail -n 1 $srcdir/phones.txt | awk '{print $2}') -awk -v start=$highest_number '{print $1, NR+start}' <$tmpdir/extra_disambig.txt >>$dir/words.txt +awk -v start=$highest_number '{print $1, NR+start}' <$tmpdir/extra_disambig.txt >>$dir/phones.txt echo "$0: added $(wc -l <$tmpdir/extra_disambig.txt) extra disambiguation symbols to phones.txt" From f61047074ffc0cf35afbe3535c29d5e19a4c3c9a Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Tue, 2 Apr 2019 05:44:59 -0400 Subject: [PATCH 121/235] removed s5_gigaword folder --- .../s5_gigaword/cmd.sh | 15 - .../s5_gigaword/conf/decode.config | 6 - .../s5_gigaword/conf/mfcc.conf | 2 - .../s5_gigaword/conf/mfcc_hires.conf | 10 - .../s5_gigaword/conf/online_cmvn.conf | 1 - .../s5_gigaword/conf/plp.conf | 2 - .../local/callhome_create_splits.sh | 31 - .../s5_gigaword/local/callhome_data_prep.sh | 163 ---- .../s5_gigaword/local/callhome_get_1_best.py | 75 -- .../local/callhome_get_lattices.py | 115 --- .../local/callhome_make_spk2gender.sh | 29 - .../s5_gigaword/local/callhome_make_trans.pl | 74 -- .../s5_gigaword/local/callhome_text_pp.sh | 9 - .../s5_gigaword/local/chain/run_tdnn_1g.sh | 294 ------- .../s5_gigaword/local/clean_abbrevs_text.py | 35 - .../s5_gigaword/local/clean_txt_dir.sh | 57 -- .../s5_gigaword/local/create_oracle_ctm.sh | 30 - .../s5_gigaword/local/create_splits.sh | 30 - .../s5_gigaword/local/ctm.sh | 34 - .../s5_gigaword/local/decode_report.py | 148 ---- .../s5_gigaword/local/find_unique_phones.pl | 25 - .../s5_gigaword/local/fix_stm.sh | 10 - .../flatten_gigaword/flatten_all_gigaword.sh | 15 - .../flatten_gigaword/flatten_one_gigaword.py | 61 -- .../local/flatten_gigaword/run_flat.sh | 17 - .../s5_gigaword/local/fsp_create_test_lang.sh | 49 -- .../s5_gigaword/local/fsp_data_prep.sh | 176 ---- .../local/fsp_ideal_data_partitions.pl | 85 -- .../s5_gigaword/local/fsp_make_spk2gender.sh | 29 - .../s5_gigaword/local/fsp_make_trans.pl | 81 -- .../s5_gigaword/local/fsp_prepare_dict.sh | 142 ---- .../s5_gigaword/local/fsp_train_lms.sh | 140 ---- .../s5_gigaword/local/get_1_best.py | 62 -- .../s5_gigaword/local/get_data_weights.pl | 39 - .../s5_gigaword/local/get_lattices.py | 115 --- .../s5_gigaword/local/get_oracle.sh | 32 - .../s5_gigaword/local/get_rnnlm_wordlist.py | 34 - .../local/get_unigram_weights_vocab.py | 33 - .../s5_gigaword/local/isolate_phones.pl | 66 -- .../s5_gigaword/local/latconvert.sh | 124 --- .../s5_gigaword/local/merge_lexicons.py | 65 -- .../s5_gigaword/local/monitor_denlats.sh | 31 - .../local/nnet3/run_ivector_common.sh | 187 ----- .../s5_gigaword/local/pocolm_cust.sh | 120 --- .../s5_gigaword/local/process_oracle.py | 64 -- .../s5_gigaword/local/rescore.sh | 24 - .../s5_gigaword/local/rnnlm.sh | 83 -- .../s5_gigaword/local/run_norm.sh | 36 - .../s5_gigaword/local/run_sgmm2x.sh | 57 -- .../s5_gigaword/local/score.sh | 1 - .../s5_gigaword/local/score_oracle.sh | 29 - .../s5_gigaword/local/splits/dev | 20 - .../local/splits/split_callhome/dev | 20 - .../local/splits/split_callhome/test | 20 - .../local/splits/split_callhome/train | 80 -- .../s5_gigaword/local/splits/split_fisher/dev | 20 - .../local/splits/split_fisher/dev2 | 20 - .../local/splits/split_fisher/test | 20 - .../local/splits/split_fisher/train | 759 ------------------ .../s5_gigaword/local/splits/test | 20 - .../s5_gigaword/local/splits/train | 80 -- .../s5_gigaword/local/spron.pl | 304 ------- .../s5_gigaword/local/subset_data_prep.sh | 164 ---- .../s5_gigaword/local/train_get_1_best.py | 79 -- .../s5_gigaword/local/train_get_lattices.py | 125 --- .../s5_gigaword/local/train_pocolm.sh | 54 -- .../s5_gigaword/local/train_process_oracle.py | 79 -- .../s5_gigaword/local/wer_output_filter | 5 - .../s5_gigaword/path.sh | 13 - egs/fisher_callhome_spanish/s5_gigaword/rnnlm | 1 - .../s5_gigaword/run.sh | 310 ------- egs/fisher_callhome_spanish/s5_gigaword/steps | 1 - egs/fisher_callhome_spanish/s5_gigaword/utils | 1 - 73 files changed, 5387 deletions(-) delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/cmd.sh delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/clean_abbrevs_text.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_prepare_dict.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/pocolm_cust.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh delete mode 120000 egs/fisher_callhome_spanish/s5_gigaword/local/score.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/test delete mode 100644 egs/fisher_callhome_spanish/s5_gigaword/local/splits/train delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/path.sh delete mode 120000 egs/fisher_callhome_spanish/s5_gigaword/rnnlm delete mode 100755 egs/fisher_callhome_spanish/s5_gigaword/run.sh delete mode 120000 egs/fisher_callhome_spanish/s5_gigaword/steps delete mode 120000 egs/fisher_callhome_spanish/s5_gigaword/utils diff --git a/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh b/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh deleted file mode 100755 index db97f1fbc6f..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/cmd.sh +++ /dev/null @@ -1,15 +0,0 @@ -# you can change cmd.sh depending on what type of queue you are using. -# If you have no queueing system and want to run on a local machine, you -# can change all instances 'queue.pl' to run.pl (but be careful and run -# commands one by one: most recipes will exhaust the memory on your -# machine). queue.pl works with GridEngine (qsub). slurm.pl works -# with slurm. Different queues are configured differently, with different -# queue names and different ways of specifying things like memory; -# to account for these differences you can create and edit the file -# conf/queue.conf to match your queue's configuration. Search for -# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, -# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. - -export train_cmd="retry.pl queue.pl --mem 8G" -export decode_cmd="retry.pl queue.pl --mem 8G" -export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config b/egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config deleted file mode 100644 index 7908f178373..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/conf/decode.config +++ /dev/null @@ -1,6 +0,0 @@ -# Use wider-than-normal decoding beams. -first_beam=16.0 -beam=20.0 -lat_beam=10.0 -min_lmwt=2 -max_lmwt=10 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf deleted file mode 100644 index ffb41a1aae4..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc.conf +++ /dev/null @@ -1,2 +0,0 @@ ---use-energy=false # only non-default option. ---sample-frequency=8000 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf deleted file mode 100644 index d870ab04c38..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/conf/mfcc_hires.conf +++ /dev/null @@ -1,10 +0,0 @@ -# config for high-resolution MFCC features, intended for neural network training. -# Note: we keep all cepstra, so it has the same info as filterbank features, -# but MFCC is more easily compressible (because less correlated) which is why -# we prefer this method. ---use-energy=false # use average of log energy, not energy. ---sample-frequency=8000 # Switchboard is sampled at 8kHz ---num-mel-bins=40 # similar to Google's setup. ---num-ceps=40 # there is no dimensionality reduction. ---low-freq=40 # low cutoff frequency for mel bins ---high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf deleted file mode 100644 index 7748a4a4dd3..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/conf/online_cmvn.conf +++ /dev/null @@ -1 +0,0 @@ -# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf b/egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf deleted file mode 100644 index c4b73674cab..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/conf/plp.conf +++ /dev/null @@ -1,2 +0,0 @@ -# No non-default options for now. - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh deleted file mode 100755 index 07814da46a9..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_create_splits.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -data_dir=data -train_all=data/callhome_train_all - -if [ $# -lt 1 ]; then - echo "Specify the location of the split files" - exit 1; -fi - -splitFile=$1 - -# Train first -for split in train dev test -do - dirName=callhome_$split - - cp -r $train_all $data_dir/$dirName - - awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ - $splitFile/$split $train_all/segments > $data_dir/$dirName/segments - - n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$dirName/segments | sort | uniq | wc -l` - - echo "$n conversations left in split $dirName" - - utils/fix_data_dir.sh $data_dir/$dirName - utils/validate_data_dir.sh $data_dir/$dirName -done - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh deleted file mode 100755 index f61b0fa9519..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_data_prep.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/bin/bash -# -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# The input is the Callhome Spanish Dataset. (*.sph files) -# In addition the transcripts are needed as well. -# To be run from one directory above this script. - -# Note: when creating your own data preparation scripts, it's a good idea -# to make sure that the speaker id (if present) is a prefix of the utterance -# id, that the output scp file is sorted on utterance id, and that the -# transcription file is exactly the same length as the scp file and is also -# sorted on utterance id (missing transcriptions should be removed from the -# scp file using e.g. scripts/filter_scp.pl) - -stage=0 - -export LC_ALL=C - - -if [ $# -lt 2 ]; then - echo "Arguments should be the location of the Callhome Spanish Speech and Transcript Directories, se -e ../run.sh for example." - exit 1; -fi - -cdir=`pwd` -dir=`pwd`/data/local/data -local=`pwd`/local -utils=`pwd`/utils -tmpdir=`pwd`/data/local/tmp - -. ./path.sh || exit 1; # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi -cd $dir - -# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command -# line arguments being absolute pathnames. -#rm -r links/ 2>/dev/null -mkdir -p links/ -ln -s $* links - -# Basic spot checks to see if we got the data that we needed -if [ ! -d links/LDC96S35 -o ! -d links/LDC96T17 ]; -then - echo "The speech and the data directories need to be named LDC96S35 and LDC96T17 respecti -vely" - exit 1; -fi - -if [ ! -d links/LDC96S35/CALLHOME/SPANISH/SPEECH/DEVTEST -o ! -d links/LDC96S35/CALLHOME/SPANISH/SPEECH/EVLTEST -o ! -d links/LDC96S35/CALLHOME/SPANISH/SPEECH/TRAIN ]; -then - echo "Dev, Eval or Train directories missing or not properly organised within the speech data dir" - exit 1; -fi - -#Check the transcripts directories as well to see if they exist -if [ ! -d links/LDC96T17/callhome_spanish_trans_970711/transcrp/devtest -o ! -d links/LDC96T17/callhome_spanish_trans_970711/transcrp/evltest -o ! -d links/LDC96T17/callhome_spanish_trans_970711/transcrp/train ] -then - echo "Transcript directories missing or not properly organised" - exit 1; -fi - -speech_train=$dir/links/LDC96S35/CALLHOME/SPANISH/SPEECH/TRAIN -speech_dev=$dir/links/LDC96S35/CALLHOME/SPANISH/SPEECH/DEVTEST -speech_test=$dir/links/LDC96S35/CALLHOME/SPANISH/SPEECH/EVLTEST -transcripts_train=$dir/links/LDC96T17/callhome_spanish_trans_970711/transcrp/train -transcripts_dev=$dir/links/LDC96T17/callhome_spanish_trans_970711/transcrp/devtest -transcripts_test=$dir/links/LDC96T17/callhome_spanish_trans_970711/transcrp/evltest - -fcount_train=`find ${speech_train} -iname '*.SPH' | wc -l` -fcount_dev=`find ${speech_dev} -iname '*.SPH' | wc -l` -fcount_test=`find ${speech_test} -iname '*.SPH' | wc -l` -fcount_t_train=`find ${transcripts_train} -iname '*.txt' | wc -l` -fcount_t_dev=`find ${transcripts_dev} -iname '*.txt' | wc -l` -fcount_t_test=`find ${transcripts_test} -iname '*.txt' | wc -l` - -#Now check if we got all the files that we needed -if [ $fcount_train != 80 -o $fcount_dev != 20 -o $fcount_test != 20 -o $fcount_t_train != 80 -o $fcount_t_dev != 20 -o $fcount_t_test != 20 ]; -then - echo "Incorrect number of files in the data directories" - echo "The paritions should contain 80/20/20 files" - exit 1; -fi - -if [ $stage -le 0 ]; then - #Gather all the speech files together to create a file list - ( - find $speech_train -iname '*.sph'; - find $speech_dev -iname '*.sph'; - find $speech_test -iname '*.sph'; - ) > $tmpdir/callhome_train_sph.flist - - #Get all the transcripts in one place - - ( - find $transcripts_train -iname '*.txt'; - find $transcripts_dev -iname '*.txt'; - find $transcripts_test -iname '*.txt'; - ) > $tmpdir/callhome_train_transcripts.flist - -fi - -if [ $stage -le 1 ]; then - $local/callhome_make_trans.pl $tmpdir - mkdir -p $dir/callhome_train_all - mv $tmpdir/callhome_reco2file_and_channel $dir/callhome_train_all/ -fi - -if [ $stage -le 2 ]; then - sort $tmpdir/callhome.text.1 | sed 's/^\s\s*|\s\s*$//g' | sed 's/\s\s*/ /g' > $dir/callhome_train_all/callhome.text - - #Create segments file and utt2spk file - ! cat $dir/callhome_train_all/callhome.text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > $dir/callhome_train_all/callhome_utt2spk \ - && echo "Error producing utt2spk file" && exit 1; - - cat $dir/callhome_train_all/callhome.text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; - $s = sprintf("%.2f", 0.01*$3); $e = sprintf("%.2f", 0.01*$4); print "$utt $reco $s $e\n"; ' >$dir/callhome_train_all/callhome_segments - - $utils/utt2spk_to_spk2utt.pl <$dir/callhome_train_all/callhome_utt2spk > $dir/callhome_train_all/callhome_spk2utt -fi - -if [ $stage -le 3 ]; then - for f in `cat $tmpdir/callhome_train_sph.flist`; do - # convert to absolute path - make_absolute.sh $f - done > $tmpdir/callhome_train_sph_abs.flist - - cat $tmpdir/callhome_train_sph_abs.flist | perl -ane 'm:/([^/]+)\.SPH$: || die "bad line $_; "; print lc($1)," $_"; ' > $tmpdir/callhome_sph.scp - cat $tmpdir/callhome_sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ - sort -k1,1 -u > $dir/callhome_train_all/callhome_wav.scp || exit 1; -fi - -if [ $stage -le 4 ]; then - # Build the speaker to gender map, the temporary file with the speaker in gender information is already created by fsp_make_trans.pl. - cd $cdir - #TODO: needs to be rewritten - $local/callhome_make_spk2gender.sh > $dir/callhome_train_all/callhome_spk2gender -fi - -# Rename files from the callhome directory -if [ $stage -le 5 ]; then - cd $dir/callhome_train_all - mv callhome.text text - mv callhome_segments segments - mv callhome_spk2utt spk2utt - mv callhome_wav.scp wav.scp - mv callhome_reco2file_and_channel reco2file_and_channel - mv callhome_spk2gender spk2gender - mv callhome_utt2spk utt2spk - cd $cdir -fi - -fix_data_dir.sh $dir/callhome_train_all || exit 1 -utils/validate_data_dir.sh --no-feats $dir/callhome_train_all || exit 1 - -echo "CALLHOME spanish Data preparation succeeded." - -exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py deleted file mode 100755 index a81818c2858..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_1_best.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# Extracts one best output for a set of files -# The list of files in the conversations for which 1 best output has to be extracted -# words.txt - -import os -import sys - -def findTranscription(timeDetail): - file1 = open('exp/tri5a/decode_callhome_dev/scoring/13.tra') - file2 = open('exp/tri5a/decode_callhome_train/scoring/13.tra') - for line in file1: - lineComp = line.split() - if lineComp[0] == timeDetail: - return " ".join(lineComp[1:]) - for line in file2: - lineComp = line.split() - if lineComp[0] == timeDetail: - return " ".join(lineComp[1:]) - # No result found - return -1 - - -wordsFile = open('exp/tri5a/graph/words.txt') -words = {} - -# Extract word list -for line in wordsFile: - lineComp = line.split() - words[int(lineComp[1])] = lineComp[0].strip() - -# Now read list of files in conversations -fileList = [] -#conversationList = open('/export/a04/gkumar/corpora/fishcall/joshkal-splits/provisional_dev') -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/train') -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# TODO: Make sure they match the order in which these english files are being written - -# Now get timing information to concatenate the ASR outputs -if not os.path.exists('exp/tri5a/one-best/ch_train'): - os.makedirs('exp/tri5a/one-best/ch_train') - -#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/asr.train', 'w+') -for item in fileList: - timingFile = open('/export/a04/gkumar/corpora/fishcall/callhome/tim/' + item + '.es') - newFile = open('exp/tri5a/one-best/ch_train/' + item + '.es', 'w+') - for line in timingFile: - timeInfo = line.split() - mergedTranslation = "" - for timeDetail in timeInfo: - #Locate this in ASR dev/test, this is going to be very slow - tmp = findTranscription(timeDetail) - if tmp != -1: - mergedTranslation = mergedTranslation + " " + tmp - mergedTranslation = mergedTranslation.strip() - transWords = [words[int(x)] for x in mergedTranslation.split()] - newFile.write(" ".join(transWords) + "\n") - provFile.write(" ".join(transWords) + "\n") - - newFile.close() -provFile.close() - - - - - - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py deleted file mode 100755 index 4c96e01ce7e..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_get_lattices.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# Extracts one best output for a set of files -# The list of files in the conversations for which 1 best output has to be extracted -# words.txt - -from __future__ import print_function -import os -import sys -import subprocess - -latticeLocation = 'latjosh-2-callhome/lattices-pushed/' - -tmpdir = 'data/local/data/tmp/ch-d/lattmp' -invalidplfdir = 'data/local/data/tmp/ch-d/invalidplf' -symtable = '/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/data/lang/words.clean.txt' - -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/dev') -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/asr.test.plf', 'w+') -invalidPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/invalidPLF', 'w+') -blankPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/blankPLF', 'w+') -rmLines = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/ch-d/removeLines', 'w+') - -if not os.path.exists(tmpdir): - os.makedirs(tmpdir) -if not os.path.exists(invalidplfdir): - os.makedirs(invalidplfdir) -else: - os.system("rm " + invalidplfdir + "/*") - -def latticeConcatenate(lat1, lat2): - ''' - Concatenates lattices, writes temporary results to tmpdir - ''' - if lat1 == "": - os.system('rm ' + tmpdir + '/tmp.lat') - return lat2 - else: - proc = subprocess.Popen(['fstconcat', lat1, lat2, (tmpdir + '/tmp.lat')]) - proc.wait() - return tmpdir + '/tmp.lat' - - -def findLattice(timeDetail): - ''' - Finds the lattice corresponding to a time segment - ''' - if os.path.isfile(latticeLocation + timeDetail + '.lat'): - return latticeLocation + timeDetail + '.lat' - else: - return -1 - - -# Now read list of files in conversations -fileList = [] -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# Now get timing information to concatenate the ASR outputs - -lineNo = 1 -for item in fileList: - timingFile = open('/export/a04/gkumar/corpora/fishcall/callhome/tim/' + item + '.es') - for line in timingFile: - timeInfo = line.split() - - # For utterances that are concatenated in the translation file, - # the corresponding FSTs have to be translated as well - mergedTranslation = "" - for timeDetail in timeInfo: - tmp = findLattice(timeDetail) - if tmp != -1: - # Concatenate lattices - mergedTranslation = latticeConcatenate(mergedTranslation, tmp) - - print(mergedTranslation) - if mergedTranslation != "": - - # Sanjeev's Recipe : Remove epsilons and topo sort - finalFST = tmpdir + "/final.fst" - os.system("fstrmepsilon " + mergedTranslation + " | fsttopsort - " + finalFST) - - # Now convert to PLF - proc = subprocess.Popen('/export/a04/gkumar/corpora/fishcall/bin/fsm2plf.sh ' + symtable + ' ' + finalFST, stdout=subprocess.PIPE, shell=True) - PLFline = proc.stdout.readline() - finalPLFFile = tmpdir + "/final.plf" - finalPLF = open(finalPLFFile, "w+") - finalPLF.write(PLFline) - finalPLF.close() - - # now check if this is a valid PLF, if not write it's ID in a - # file so it can be checked later - proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True) - line = proc.stdout.readline() - print("{} {}".format(line, lineNo)) - if line.strip() != "PLF format appears to be correct.": - os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0]) - invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n") - rmLines.write("{}\n".format(lineNo)) - else: - provFile.write(PLFline) - else: - blankPLF.write(timeInfo[0] + "\n") - rmLines.write("{}\n".format(lineNo)) - # Now convert to PLF - lineNo += 1 - -provFile.close() -invalidPLF.close() -blankPLF.close() -rmLines.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh deleted file mode 100755 index d06e5fe911f..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_spk2gender.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# Gets the unique speakers from the file created by fsp_make_trans.pl -# Note that if a speaker appears multiple times, it is categorized as female - -import os -import sys - -tmpFileLocation = 'data/local/tmp/callhome_spk2gendertmp' - -tmpFile = None - -try: - tmpFile = open(tmpFileLocation) -except IOError: - print 'The file spk2gendertmp does not exist. Run fsp_make_trans.pl first?' - -speakers = {} - -for line in tmpFile: - comp = line.split(' ') - if comp[0] in speakers: - speakers[comp[0]] = "f" - else: - speakers[comp[0]] = comp[1] - -for speaker, gender in speakers.iteritems(): - print speaker + " " + gender diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl deleted file mode 100755 index ec3dfd88037..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_make_trans.pl +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -use utf8; -use File::Basename; - -($tmpdir)=@ARGV; -$trans="$tmpdir/callhome_train_transcripts.flist"; -$reco="$tmpdir/callhome_reco2file_and_channel"; -open(T, "<", "$trans") || die "Can't open transcripts file"; -open(R, "|sort >$reco") || die "Can't open reco2file_and_channel file $!"; -open(O, ">$tmpdir/callhome.text.1") || die "Can't open text file for writing"; -open(G, ">$tmpdir/callhome_spk2gendertmp") || die "Can't open the speaker to gender map file"; -binmode(O, ":utf8"); -while () { - $file = $_; - m:([^/]+)\.txt: || die "Bad filename $_"; - $call_id = $1; - print R "$call_id-A $call_id A\n"; - print R "$call_id-B $call_id B\n"; - open(I, "<$file") || die "Opening file $_"; - binmode(I, ":iso88591"); - #Now read each line and extract information - while () { - #136.37 138.10 B: Ah, bueno, mamita. - chomp; - - my @stringComponents = split(":", $_, 2); - my @timeInfo = split(" ", $stringComponents[0]); - $stringComponents[1] =~ s/^\s+|\s+$//g ; - my $words = $stringComponents[1]; - #Check number of components in this array - if ((scalar @stringComponents) >= 2) { - $start = sprintf("%06d", $timeInfo[0] * 100); - $end = sprintf("%06d", $timeInfo[1] * 100); - length($end) > 6 && die "Time too long $end in $file"; - $side = "A"; - if (index($timeInfo[2], "B") != -1) { - $side = "B"; - } - $utt_id = "${call_id}-$side-$start-$end"; - $speaker_id = "${call_id}-$side"; - # All speakers are treated as male because speaker gender info - # is missing in this file - $gender = "m"; - print G "$speaker_id $gender\n" || die "Error writing to speaker2gender file"; - $words =~ s|\[\[[^]]*\]\]||g; #removes comments - $words =~ s|\{laugh\}|\$laughter\$|g; # replaces laughter tmp - $words =~ s|\[laugh\]|\$laughter\$|g; # replaces laughter tmp - $words =~ s|\{[^}]*\}|\[noise\]|g; # replaces noise - $words =~ s|\[[^]]*\]|\[noise\]|g; # replaces noise - $words =~ s|\[/*([^]]*)\]|\[noise\]|g; # replaces end of noise - $words =~ s|\$laughter\$|\[laughter\]|g; # replaces laughter again - $words =~ s|\(\(([^)]*)\)\)|\1|g; # replaces unintelligible speech - $words =~ s|<\?([^>]*)>|\1|g; # for unrecognized language - $words =~ s|background speech|\[noise\]|g; - $words =~ s|background noise|\[noise\]|g; - $words =~ s/\[/larrow/g; - $words =~ s/\]/rarrow/g; - $words =~ s/[[:punct:]]//g; - $words =~ s/larrow/\[/g; - $words =~ s/rarrow/\]/g; - $words =~ s/[¿¡]//g; - $words =~ s/\h+/ /g; # horizontal whitespace characters - $words = lc($words); - print O "$utt_id $words\n" || die "Error writing to text file"; - } - } - close(I); -} -close(T); -close(R); -close(O); -close(G); diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh deleted file mode 100755 index 37e1eca1687..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/callhome_text_pp.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -if [ $# -gt 0 ]; then - sentence=$1 - echo $sentence | sed 's:{^[}]*}:[noise]:' -fi - - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh deleted file mode 100755 index 2f478419a18..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/chain/run_tdnn_1g.sh +++ /dev/null @@ -1,294 +0,0 @@ -#!/bin/bash - -# 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e. -# with bypass resnet connections, and re-tuned. -# compute-wer --text --mode=present ark:exp/chain/multipsplice_tdnn/decode_fsp_train_test/scoring_kaldi/test_filt.txt ark,p:- -# %WER 22.21 [ 8847 / 39831, 1965 ins, 2127 del, 4755 sub ] -# %SER 56.98 [ 3577 / 6278 ] -# Scored 6278 sentences, 0 not present in hyp. - -# steps/info/chain_dir_info.pl exp/chain/multipsplice_tdnn -# exp/chain/multipsplice_tdnn: num-iters=296 nj=1..2 num-params=8.2M dim=40+100->2489 combine=-0.170->-0.165 (over 8) xent:train/valid[196,295,final]=(-2.30,-1.93,-1.83/-2.24,-1.96,-1.86) logprob:train/valid[196,295,final]=(-0.208,-0.169,-0.164/-0.189,-0.161,-0.158) - -set -e -o pipefail - -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). -stage=0 -nj=30 -train_set=train -test_sets="test dev" -gmm=tri5a # this is the source gmm-dir that we'll use for alignments; it - # should have alignments for the specified training data. -num_threads_ubm=32 -nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. - -# Options which are not passed through to run_ivector_common.sh -affix=1g #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. -common_egs_dir= -reporting_email= -gigaword_workdir= - -# LSTM/chain options -train_stage=-20 -xent_regularize=0.1 -dropout_schedule='0,0@0.20,0.3@0.50,0' - -# training chunk-options -chunk_width=140,100,160 -# we don't need extra left/right context for TDNN systems. -chunk_left_context=0 -chunk_right_context=0 - -# training options -srand=0 -remove_egs=true - -#decode options -test_online_decoding=false # if true, it will run the last decoding stage. - -# End configuration section. -echo "$0 $@" # Print the command line for logging - - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - - -if ! cuda-compiled; then - cat <$lang/topo - fi -fi - -if [ $stage -le 17 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ - data/lang $gmm_dir $lat_dir - rm $lat_dir/fsts.*.gz # save space -fi - -if [ $stage -le 18 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. The num-leaves is always somewhat less than the num-leaves from - # the GMM baseline. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - steps/nnet3/chain/build_tree.sh \ - --frame-subsampling-factor 3 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ - $lang $ali_dir $tree_dir -fi - - -if [ $stage -le 19 ]; then - mkdir -p $dir - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" - tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" - linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" - prefinal_opts="l2-regularize=0.01" - output_opts="l2-regularize=0.005" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1024 - tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 - tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 - tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 - tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 - tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 - linear-component name=prefinal-l dim=192 $linear_opts - - - prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 - output-layer name=output include-log-softmax=false dim=$num_targets $output_opts - - prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - - -if [ $stage -le 20 ]; then - if [[ $(hostname -f) == *.clsp.joujhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi - - steps/nnet3/chain/train.py --stage=$train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.0 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.srand $srand \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs 4 \ - --trainer.frames-per-iter 5000000 \ - --trainer.optimization.num-jobs-initial 1 \ - --trainer.optimization.num-jobs-final=2 \ - --trainer.optimization.initial-effective-lrate 0.0005 \ - --trainer.optimization.final-effective-lrate 0.00005 \ - --trainer.num-chunk-per-minibatch 128,64 \ - --trainer.optimization.momentum 0.0 \ - --egs.chunk-width $chunk_width \ - --egs.chunk-left-context 0 \ - --egs.chunk-right-context 0 \ - --egs.dir "$common_egs_dir" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --cleanup.remove-egs $remove_egs \ - --use-gpu true \ - --feat-dir $train_data_dir \ - --tree-dir $tree_dir \ - --lat-dir exp/tri5a_lats_nodup_sp \ - --dir $dir || exit 1; -fi - -if [ $stage -le 21 ]; then - # The reason we are using data/lang_test here, instead of $lang, is just to - # emphasize that it's not actually important to give mkgraph.sh the - # lang directory with the matched topology (since it gets the - # topology file from the model). So you could give it a different - # lang directory, one that contained a wordlist and LM of your choice, - # as long as phones.txt was compatible. - #LM was trained only on Fisher Spanish train subset. - - utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ - $tree_dir $tree_dir/graph_fsp_train || exit 1; - -fi - -# Let's train first a small RNNLM on Fisher train set -rnnlmdir=exp/rnnlm_lstm_tdnn_1b -if [ $stage -le 22 ]; then - rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; -fi - -if [ $stage -le 23 ]; then - frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - rm $dir/.error 2>/dev/null || true - - for data in $test_sets; do - ( - nspk=$(wc -l ") - print(" Processes the text before text normalisation to convert uppercase words as space separated letters") - sys.exit() - -inputfile=codecs.open(sys.argv[1], encoding='utf-8') -outputfile=codecs.open(sys.argv[2], encoding='utf-8', mode='w') - -for line in inputfile: - words = line.split() - textout = "" - wordcnt = 0 - for word in words: - if re.match(r"\b([A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ])+[']?s?\b", word): - if wordcnt > 0: - word = re.sub('\'?s', 's', word) - textout = textout + " ".join(word) + " " - else: - textout = textout + word + " " - else: - textout = textout + word + " " - if word.isalpha(): wordcnt = wordcnt + 1 - outputfile.write(textout.strip()+ '\n') - -inputfile.close() -outputfile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh deleted file mode 100755 index 1880b3a90cb..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/clean_txt_dir.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# Script to clean up gigaword LM text -# Removes punctuations, does case normalization - -stage=0 -nj=500 - -. ./path.sh -. ./cmd.sh -. ./utils/parse_options.sh - -set -euo pipefail - -if [ $# -ne 2 ]; then - echo "Usage: $0 " - exit 1; -fi - -if [ ! -s `which normalizer_main` ] ; then - echo "Sparrowhawk normalizer was not found installed !" - echo "Go to $KALDI_ROOT/tools and execute install_sparrowhawk.sh and try again!" - exit 1 -fi - -txtdir=$1 -textdir=$(realpath $txtdir) -outdir=$(realpath $2) - -workdir=$outdir/tmp -if [ $stage -le 0 ]; then - rm -rf $outdir - mkdir -p $workdir - mkdir -p $textdir/splits - mkdir -p $outdir/data - split -l 1000000 $textdir/in.txt $textdir/splits/out - numsplits=0 - for x in $textdir/splits/*; do - numsplits=$((numsplits+1)) - ln -s $x $outdir/data/$numsplits - done - echo $numsplits - cp $SPARROWHAWK_ROOT/documentation/grammars/sentence_boundary_exceptions.txt . - $train_cmd --max_jobs_run 100 JOB=1:$numsplits $outdir/sparrowhawk/log/JOB.log \ - local/run_norm.sh \ - sparrowhawk_configuration.ascii_proto \ - $SPARROWHAWK_ROOT/language-resources/en/sparrowhawk/ \ - $outdir/data \ - JOB \ - $outdir/sparrowhawk/ - cat $outdir/sparrowhawk/*.txt | sed "/^$/d" > $outdir/text_normalized - - # check if numbers are there in normalized output - awk '{for(i=1;i<=NF;i++) {if (!seen[$i]) {print $i; seen[$i]=1} }}' \ - $outdir/text_normalized > $outdir/unique_words - grep "[0-9]" $outdir/unique_words | sort -u > $outdir/numbers -fi diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh deleted file mode 100755 index d48a96db5c4..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/create_oracle_ctm.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -# No sanity checks here, they need to be added - -data=data/callhome_test -dir=exp/tri5a/decode_callhome_test -lang=data/lang -LMWT=13 - -[ -f ./path.sh ] && . ./path.sh - -cmd=run.pl -filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel" -name=`basename $data`; -model=$dir/../final.mdl # assume model one level up from decoding dir. -symTable=$lang/words.txt - -if [ ! -f $dir/oracle/oracle.lat.gz ]; then - cat $data/text | utils/sym2int.pl --map-oov [oov] -f 2- $symTable | \ - lattice-oracle --write-lattices="ark:|gzip -c > $dir/oracle/oracle.lat.gz" \ - "ark:gunzip -c $dir/lat.*.gz|" ark:- ark:- > /dev/null 2>&1 -fi - -lattice-align-words $lang/phones/word_boundary.int $model \ - "ark:gunzip -c $dir/oracle/oracle.lat.gz|" ark:- | \ - lattice-1best --lm-scale=$LMWT ark:- ark:- | nbest-to-ctm ark:- - | \ - utils/int2sym.pl -f 5 $lang/words.txt | \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - > $dir/oracle/$name.ctm diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh deleted file mode 100755 index 8a60dc9d422..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/create_splits.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -data_dir=data -train_all=data/train_all - -if [ $# -lt 1 ]; then - echo "Specify the location of the split files" - exit 1; -fi - -splitFile=$1 - -# Train first -for split in train dev test dev2 -do - - cp -r $train_all $data_dir/$split - - awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ - $splitFile/$split $train_all/segments > $data_dir/$split/segments - - n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$split/segments | sort | uniq | wc -l` - - echo "$n conversations left in split $split" - - utils/fix_data_dir.sh $data_dir/$split - utils/validate_data_dir.sh $data_dir/$split -done - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh deleted file mode 100755 index 7d09f574580..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/ctm.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -. ./cmd.sh - -split=test -data_dir=data/test -decode_dir=exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it4/ -lang_dir=data/lang - -# Create the STM file -# Always create this file before creating the CTM files so that -# channel numbers are properly created. -if [ ! -f $data_dir/stm ]; then - /export/a11/guoguo/babel/103-bengali-limitedLP.official/local/prepare_stm.pl $data_dir -fi - -# Create the CTM file -steps/get_ctm.sh $data_dir $lang_dir $decode_dir - -# Make sure that channel markers match -#sed -i "s:\s.*_fsp-([AB]): \1:g" data/dev/stm -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s1\s:fsp A :g' {} -#ls exp/tri5a/decode_dev/score_*/dev.ctm | xargs -I {} sed -i -r 's:fsp\s2\s:fsp B :g' {} - -# Get the environment variables -. /export/babel/data/software/env.sh - -# Start scoring -/export/a11/guoguo/babel/103-bengali-limitedLP.official/local/score_stm.sh $data_dir $lang_dir \ - $decode_dir - -# Print a summary of the result -grep "Percent Total Error" $decode_dir/score_*/$split.ctm.dtl diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py b/egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py deleted file mode 100755 index 6f3d3f80c95..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/decode_report.py +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env python - -# Author : Gaurav Kumar (Johns Hopkins University) -# Gets a report on what the best word error rate was and which iteration -# led to it. This is needed both for reporting purposes and for setting -# the acoustic scale weight which extracting lattices. -# This script is specific to my partitions and needs to be made more general -# or modified - -from __future__ import print_function -import subprocess -import os - -decode_directories = ['exp/tri5a/decode_dev', - 'exp/tri5a/decode_test', - 'exp/tri5a/decode_dev2', - 'exp/sgmm2x_6a/decode_dev_fmllr', - 'exp/sgmm2x_6a/decode_test_fmllr', - 'exp/sgmm2x_6a/decode_dev2_fmllr', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it1', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it2', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it3', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_it4', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it1', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it2', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it3', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_it4', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it1', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it2', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it3', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_it4', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it1', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it2', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it3', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it4', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it1', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it2', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it3', - 'exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it4', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it1', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it2', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it3', - 'exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it4' - ] - -def get_best_wer(decode_dir): - best_iteration = 0 - best_wer = 100.0 - for i in range(16): - if os.path.isfile("{}/wer_{}".format(decode_dir, i)): - result = subprocess.check_output("tail -n 3 {}/wer_{}".format(decode_dir, i), shell=True) - wer_string = result.split("\n")[0] - wer_details = wer_string.split(' ') - # Get max WER - wer = float(wer_details[1]) - if wer < best_wer: - best_wer = wer - best_iteration = i - return best_iteration, best_wer - -for decode_dir in decode_directories[:6]: - print(decode_dir) - print(get_best_wer(decode_dir)) - -# Separate processing for bMMI stuff -best_wer = 100.0 -best_dir = "" -best_iteration = 0 - -for decode_dir in decode_directories[6:10]: - iteration, wer = get_best_wer(decode_dir) - if wer < best_wer: - best_wer = wer - best_dir = decode_dir - best_iteration = iteration - -print(best_dir) -print((best_iteration, best_wer)) - -best_wer = 100.0 -best_dir = "" -best_iteration = 0 - -for decode_dir in decode_directories[10:14]: - iteration, wer = get_best_wer(decode_dir) - if wer < best_wer: - best_wer = wer - best_dir = decode_dir - best_iteration = iteration - -print(best_dir) -print((best_iteration, best_wer)) - -best_wer = 100.0 -best_dir = "" -best_iteration = 0 - -for decode_dir in decode_directories[14:18]: - iteration, wer = get_best_wer(decode_dir) - if wer < best_wer: - best_wer = wer - best_dir = decode_dir - best_iteration = iteration - -print(best_dir) -print((best_iteration, best_wer)) - -best_wer = 100.0 -best_dir = "" -best_iteration = 0 - -for decode_dir in decode_directories[18:22]: - iteration, wer = get_best_wer(decode_dir) - if wer <= best_wer: - best_wer = wer - best_dir = decode_dir - best_iteration = iteration - -print(best_dir) -print((best_iteration, best_wer)) - -best_wer = 100.0 -best_dir = "" -best_iteration = 0 - -for decode_dir in decode_directories[22:26]: - iteration, wer = get_best_wer(decode_dir) - if wer <= best_wer: - best_wer = wer - best_dir = decode_dir - best_iteration = iteration - -print(best_dir) -print((best_iteration, best_wer)) - -best_wer = 100.0 -best_dir = "" -best_iteration = 0 - -for decode_dir in decode_directories[26:]: - iteration, wer = get_best_wer(decode_dir) - if wer <= best_wer: - best_wer = wer - best_dir = decode_dir - best_iteration = iteration - -print(best_dir) -print((best_iteration, best_wer)) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl deleted file mode 100755 index 2da41182d20..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/find_unique_phones.pl +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env perl -#Finds unique phones from the basic rules file -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -use utf8; - -($b)=$ARGV[0]; -($tmpdir)=$ARGV[1]; -open(BB, "<", "$b/basic_rules") || die "Can't open basic rules"; -binmode(BB, ":iso88591"); -open(O, ">$tmpdir/phones") || die "Can't open text file for writing"; -binmode(O, ":utf8"); -my %phones = qw(); -while () { - chomp; - my @stringComponents = split(/\t/); - m/->\s(\S+)/; - my $phone = $1; - $phone =~ tr/áéíóú/aeiou/; - $phones{$phone} = 1; -} -foreach my $p (keys %phones) { - print O $p, "\n"; -} -#print keys %phones; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh deleted file mode 100755 index 20220d107bc..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fix_stm.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -# Fixes the CALLHOME stm files -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -data_dir=$1 - -cat $data_dir/stm | awk '{$1=substr(tolower($1),0,length($1)-4);print;}' > $data_dir/stm_new -mv $data_dir/stm $data_dir/stm.bak -mv $data_dir/stm_new $data_dir/stm diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh deleted file mode 100755 index 242359e7c28..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_all_gigaword.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Path to Gigaword corpus with all data files decompressed. -export GIGAWORDDIR=$1 -# The directory to write output to -export OUTPUTDIR=$2 -# The number of jobs to run at once -export NUMJOBS=$3 - -echo "Flattening Gigaword with ${NUMJOBS} processes..." -mkdir -p $OUTPUTDIR -find ${GIGAWORDDIR}/data/*/* -type f -print -exec local/flatten_gigaword/run_flat.sh {} ${OUTPUTDIR} \; -echo "Combining the flattened files into one..." -cat ${OUTPUTDIR}/*.flat > ${OUTPUTDIR}/flattened_gigaword.txt diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py deleted file mode 100644 index 29f6766dd84..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/flatten_one_gigaword.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- - -import logging -import os -import re -import spacy -import gzip - -from argparse import ArgumentParser -from bs4 import BeautifulSoup - -en_nlp = spacy.load("es") - - -def flatten_one_gigaword_file(file_path): - f = gzip.open(file_path) - html = f.read() - # Parse the text with BeautifulSoup - soup = BeautifulSoup(html, "html.parser") - - # Iterate over all

items and get the text for each. - all_paragraphs = [] - for paragraph in soup("p"): - # Turn inter-paragraph newlines into spaces - paragraph = paragraph.get_text() - paragraph = re.sub(r"\n+", "\n", paragraph) - paragraph = paragraph.replace("\n", " ") - # Tokenize the paragraph into words - tokens = en_nlp.tokenizer(paragraph) - words = [str(token) for token in tokens if not - str(token).isspace()] - if len(words) < 3: - continue - all_paragraphs.append(words) - # Return a list of strings, where each string is a - # space-tokenized paragraph. - return [" ".join(paragraph) for paragraph in all_paragraphs] - - -if __name__ == "__main__": - log_fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - logging.basicConfig(level=logging.INFO, format=log_fmt) - logger = logging.getLogger(__name__) - - parser = ArgumentParser(description=("Flatten a gigaword data file for " - "use in language modeling.")) - parser.add_argument("--gigaword-path", required=True, - metavar="", type=str, - help=("Path to Gigaword directory, with " - "all .gz files unzipped.")) - parser.add_argument("--output-dir", required=True, metavar="", - type=str, help=("Directory to write final flattened " - "Gigaword file.")) - - A = parser.parse_args() - all_paragraphs = flatten_one_gigaword_file(A.gigaword_path) - output_path = os.path.join(A.output_dir, - os.path.basename(A.gigaword_path) + ".flat") - with open(output_path, "w") as output_file: - for paragraph in all_paragraphs: - output_file.write("{}\n".format(paragraph)) diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh deleted file mode 100755 index 6b236be0ab9..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/flatten_gigaword/run_flat.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash -set -e - -. ./path_venv.sh - -# Path to Gigaword corpus with all data files decompressed. -GIGAWORDPATH=$1 -# The directory to write output to -OUTPUTDIR=$2 -file=$(basename ${GIGAWORDPATH}) -if [ ! -e ${OUTPUTDIR}/${file}.flat ]; then - echo "flattening to ${OUTPUTDIR}/${file}.flat" - python local/flatten_gigaword/flatten_one_gigaword.py --gigaword-path ${GIGAWORDPATH} --output-dir ${OUTPUTDIR} -else - echo "skipping ${file}.flat" -fi - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh deleted file mode 100755 index fb765b57e69..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_create_test_lang.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# - -if [ -f path.sh ]; then . ./path.sh; fi - -mkdir -p data/lang_test - -arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz -[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; - -mkdir -p data/lang_test -cp -r data/lang/* data/lang_test - -gunzip -c "$arpa_lm" | \ - arpa2fst --disambig-symbol=#0 \ - --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst - - -echo "Checking how stochastic G is (the first of these numbers should be small):" -fstisstochastic data/lang_test/G.fst - -## Check lexicon. -## just have a look and make sure it seems sane. -echo "First few lines of lexicon FST:" -fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head - -echo Performing further checks - -# Checking that G.fst is determinizable. -fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. - -# Checking that L_disambig.fst is determinizable. -fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. - -# Checking that disambiguated lexicon times G is determinizable -# Note: we do this with fstdeterminizestar not fstdeterminize, as -# fstdeterminize was taking forever (presumbaly relates to a bug -# in this version of OpenFst that makes determinization slow for -# some case). -fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ - fstdeterminizestar >/dev/null || echo Error - -# Checking that LG is stochastic: -fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ - fstisstochastic || echo "[log:] LG is not stochastic" - - -echo "$0 succeeded" diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh deleted file mode 100755 index 22b98a6c9db..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_data_prep.sh +++ /dev/null @@ -1,176 +0,0 @@ -#!/bin/bash -# -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# The input is the Fisher Dataset which contains DISC1 and DISC2. (*.sph files) -# In addition the transcripts are needed as well. -# To be run from one directory above this script. - -# Note: when creating your own data preparation scripts, it's a good idea -# to make sure that the speaker id (if present) is a prefix of the utterance -# id, that the output scp file is sorted on utterance id, and that the -# transcription file is exactly the same length as the scp file and is also -# sorted on utterance id (missing transcriptions should be removed from the -# scp file using e.g. scripts/filter_scp.pl) - -stage=0 - -export LC_ALL=C - - -if [ $# -lt 2 ]; then - echo "Usage: $0 " - echo "e.g.: $0 /home/mpost/data/LDC/LDC2010S01 /home/mpost/data/LDC/LDC2010T04" - exit 1; -fi - -cdir=`pwd` -dir=`pwd`/data/local/data -lmdir=`pwd`/data/local/nist_lm -mkdir -p $dir $lmdir -local=`pwd`/local -utils=`pwd`/utils -tmpdir=`pwd`/data/local/tmp -mkdir -p $tmpdir - -. ./path.sh || exit 1; # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi -cd $dir - -# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command -# line arguments being absolute pathnames. -rm -r links/ 2>/dev/null -mkdir links/ -ln -s $* links - -# Basic spot checks to see if we got the data that we needed -if [ ! -d links/LDC2010S01 -o ! -d links/LDC2010T04 ]; -then - echo "The speech and the data directories need to be named LDC2010S01 and LDC2010T04 respecti -vely" - exit 1; -fi - -#if [ ! -d links/LDC2010S01/DISC1/data/speech -o ! -d links/LDC2010S01/DISC2/data/speech ]; -if [ ! -d links/LDC2010S01/data/speech ]; -then - echo "Speech directories missing or not properly organised within the speech data dir" - echo "Typical format is LDC2010S01/data/speech" - exit 1; -fi - -#Check the transcripts directories as well to see if they exist -if [ ! -d links/LDC2010T04/fisher_spa_tr/data/transcripts ]; -then - echo "Transcript directories missing or not properly organised" - echo "Typical format is LDC2010T04/fisher_spa_tr/data/transcripts" - exit 1; -fi - -#speech_d1=$dir/links/LDC2010S01/DISC1/data/speech -#speech_d2=$dir/links/LDC2010S01/DISC2/data/speech -speech=$dir/links/LDC2010S01/data/speech -transcripts=$dir/links/LDC2010T04/fisher_spa_tr/data/transcripts - -#fcount_d1=`find ${speech_d1} -iname '*.sph' | wc -l` -#fcount_d2=`find ${speech_d2} -iname '*.sph' | wc -l` -fcount_s=`find ${speech} -iname '*.sph' | wc -l` -fcount_t=`find ${transcripts} -iname '*.tdf' | wc -l` -#TODO:it seems like not all speech files have transcripts -#Now check if we got all the files that we needed -#if [ $fcount_d1 != 411 -o $fcount_d2 != 408 -o $fcount_t != 819 ]; -if [ $fcount_s != 819 -o $fcount_t != 819 ]; -then - echo "Incorrect number of files in the data directories" - echo "DISC1 and DISC2 should contain 411 and 408 .sph files respectively (Total = 819)" - echo "The transcripts should contain 819 files" - exit 1; -fi - -if [ $stage -le 0 ]; then - #Gather all the speech files together to create a file list - #TODO: Train and test split might be required - ( - #find $speech_d1 -iname '*.sph'; - #find $speech_d2 -iname '*.sph'; - find $speech -iname '*.sph'; - ) > $tmpdir/train_sph.flist - - #Get all the transcripts in one place - find $transcripts -iname '*.tdf' > $tmpdir/train_transcripts.flist -fi - -if [ $stage -le 1 ]; then - $local/fsp_make_trans.pl $tmpdir - mkdir -p $dir/train_all - mv $tmpdir/reco2file_and_channel $dir/train_all/ -fi - -if [ $stage -le 2 ]; then - sort $tmpdir/text.1 | grep -v '((' | \ - awk '{if (NF > 1){ print; }}' | \ - sed 's:<\s*[/]*\s*\s*for[ei][ei]g[nh]\s*\w*>::g' | \ - sed 's:\([^<]*\)<\/lname>:\1:g' | \ - sed 's:::g' | \ - sed 's:[^<]*<\/laugh>:[laughter]:g' | \ - sed 's:<\s*cough[\/]*>:[noise]:g' | \ - sed 's::[noise]:g' | \ - sed 's::[noise]:g' | \ - sed 's::[noise]:g' | \ - sed 's:[^<]*<\/background>:[noise]:g' | \ - sed -r 's:<[/]?background[/]?>:[noise]:g' | \ - #One more time to take care of nested stuff - sed 's:[^<]*<\/laugh>:[laughter]:g' | \ - sed -r 's:<[/]?laugh[/]?>:[laughter]:g' | \ - #now handle the exceptions, find a cleaner way to do this? - sed 's:::g' | \ - sed 's:::g' | \ - sed 's:foreign>::g' | \ - sed 's:\[noise\]:[noise] :g' | \ - sed 's:>::g' | \ - #How do you handle numbers? - grep -v '()' | \ - #Now go after the non-printable characters and multiple spaces - sed -r 's:¿::g' | sed 's/^\s\s*|\s\s*$//g' | sed 's/\s\s*/ /g' > $tmpdir/text.2 - cp $tmpdir/text.2 $dir/train_all/text - - #Create segments file and utt2spk file - ! cat $dir/train_all/text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > $dir/train_all/utt2spk \ - && echo "Error producing utt2spk file" && exit 1; - - cat $dir/train_all/text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; - $s = sprintf("%.2f", 0.01*$3); $e = sprintf("%.2f", 0.01*$4); if ($s != $e) {print "$utt $reco $s $e\n"}; ' >$dir/train_all/segments - - $utils/utt2spk_to_spk2utt.pl <$dir/train_all/utt2spk > $dir/train_all/spk2utt -fi - -if [ $stage -le 3 ]; then - for f in `cat $tmpdir/train_sph.flist`; do - # convert to absolute path - make_absolute.sh $f - done > $tmpdir/train_sph_abs.flist - - cat $tmpdir/train_sph_abs.flist | perl -ane 'm:/([^/]+)\.sph$: || die "bad line $_; "; print "$1 $_"; ' > $tmpdir/sph.scp - cat $tmpdir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ - sort -k1,1 -u > $dir/train_all/wav.scp || exit 1; -fi - -if [ $stage -le 4 ]; then - # Build the speaker to gender map, the temporary file with the speaker in gender information is already created by fsp_make_trans.pl. - cd $cdir - $local/fsp_make_spk2gender.sh > $dir/train_all/spk2gender -fi - -fix_data_dir.sh $dir/train_all || exit 1 -validate_data_dir.sh --no-feats $dir/train_all || exit 1 - -echo "Fisher Spanish Data preparation succeeded." - -exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl deleted file mode 100755 index 538bca58981..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_ideal_data_partitions.pl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env perl -# -# Johns Hopkins University (Author : Gaurav Kumar) -# -# This script should be run from one directory above the current one -# -# Rough partitions that are needed are : -# -# ASR Train : 120k utterances -# ASR tune : 20k utterances -# ASR eval : 20k utterances -# MT train : 105k utterances -# MT tune : Same as the ASR eval (20k utterances) -# MT eval : 20k utterances -# -# This script tries to find the closest possible matches so that conversations -# belong in one single partition and hence there is no speaker/conversation -# overlap between data partitions - -use Storable 'dclone'; - -$textfile="data/local/data/train_all/text"; -$tmp="data/local/tmp"; - -open(T, "<", "$textfile") || die "Can't open text file"; - -$ongoingConv = ""; -%tmpSplits = (); -@splitNumbers = (17455, 20000, 100000, 20000, 100000); -$splitId = 0; -%splits = (); - -while () { - @myStringComponents = split(/\s/); - @uttid = split('-', $myStringComponents[0]); - $currentConv = $uttid[0]; - if ($currentConv eq $ongoingConv) { - # Same conversation, add to current hash - #print "Same conversation"; - $tmpSplits{$ongoingConv} += 1; - } - else { - # New conversation intiated, first check if there are enough entries - # in the hash - #print $ongoingConv . " " . get_entries_hash(\%tmpSplits) . "\n"; - if (get_entries_hash(\%tmpSplits) > $splitNumbers[$splitId]) { - print "Finished processing split " . $splitId . ". It contains " . get_entries_hash(\%tmpSplits) . " entries. \n"; - #$splits{$splitId} = keys %tmpSplits; - @newArr = keys %tmpSplits; - $splits{$splitId} = dclone(\@newArr); - %tmpSplits = (); - $splitId += 1; - } - $ongoingConv = $currentConv; - $tmpSplits{$ongoingConv} = 1; - } -} -# Put final tmpsplits in the right partition -@newArr = keys %tmpSplits; -$splits{$splitId} = dclone(\@newArr); -foreach (keys %splits) { - #print $_ , " ", $splits{$_}, "\n"; -} -print "Finished processing split " . $splitId . ". It contains " . get_entries_hash(\%tmpSplits) . " entries. \n"; - -# Write splits to file -foreach my $key ( keys %splits ) { - open(S, ">$tmp/split-$key") || die "Can't open splitfile to write"; - foreach my $file ( @{$splits{$key}} ) { - print $file, "\n"; - print S "$file\n" || die "Error writing to file"; - } - close(S); -} - -sub get_entries_hash() { - my $inputHashRef = shift; - $total = 0; - foreach (keys %{$inputHashRef}) - { - $total += $inputHashRef->{$_}; - } - return $total; -} - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh deleted file mode 100755 index 15b1c0064cf..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_spk2gender.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# Gets the unique speakers from the file created by fsp_make_trans.pl -# Note that if a speaker appears multiple times, it is categorized as female - -import os -import sys - -tmpFileLocation = 'data/local/tmp/spk2gendertmp' - -tmpFile = None - -try: - tmpFile = open(tmpFileLocation) -except IOError: - print 'The file spk2gendertmp does not exist. Run fsp_make_trans.pl first?' - -speakers = {} - -for line in tmpFile: - comp = line.split(' ') - if comp[0] in speakers: - speakers[comp[0]] = "f" - else: - speakers[comp[0]] = comp[1] - -for speaker, gender in speakers.iteritems(): - print speaker + " " + gender diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl deleted file mode 100755 index 8c3f74e3917..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_make_trans.pl +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -use utf8; -use File::Basename; -($tmpdir)=@ARGV; -#$tmpdir='../data/local/tmp'; -$trans="$tmpdir/train_transcripts.flist"; -$reco="$tmpdir/reco2file_and_channel"; -open(T, "<", "$trans") || die "Can't open transcripts file"; -open(R, "|sort >$reco") || die "Can't open reco2file_and_channel file $!"; -open(O, ">$tmpdir/text.1") || die "Can't open text file for writing"; -open(G, ">$tmpdir/spk2gendertmp") || die "Can't open the speaker to gender map file"; -binmode(O, ":utf8"); -while () { - $file = $_; - m:([^/]+)\.tdf: || die "Bad filename $_"; - $call_id = $1; - print R "$call_id-A $call_id A\n"; - print R "$call_id-B $call_id B\n"; - open(I, "<$file") || die "Opening file $_"; - binmode(I, ":utf8"); - # Get rid of header sections first - foreach ( 0..2 ) { - $tmpLine = ; - } - #Now read each line and extract information - while () { - #20051017_215732_274_fsp.sph 1 0.0 0.909856781803 Audrey female native Audrey 0 0 -1 - chomp; - my @stringComponents = split(/\t/); - - #Check number of components in this array - if ((scalar @stringComponents) >= 11) { - $start = sprintf("%06d", $stringComponents[2] * 100); - $end = sprintf("%06d", $stringComponents[3] * 100); - length($end) > 6 && die "Time too long $end in $file"; - $side = $stringComponents[1] ? "B" : "A"; - $words = $stringComponents[7]; - $utt_id = "${call_id}-$side-$start-$end"; - $speaker_id = "${call_id}-$side"; - $gender = "m"; - if ($stringComponents[5] == "female") { - $gender = "f"; - } - print G "$speaker_id $gender\n" || die "Error writing to speaker2gender file"; - $words =~ s:/rarrow/g; - $words =~ s/[[:punct:]]//g; - $words =~ s/larrow//g; - $words =~ s:lendarrow: 0){ print; }}' > $tmpdir/uniquewords - if [ ! -f "${tmpdir}/es_wordlist.json" ]; then - echo "Could not find the large collection of Spanish words es_wordlist.json" - echo "Trying to download it via wget" - - if ! which wget >&/dev/null; then - echo "This script requires you to first install wget" - exit 1; - fi - - cwd=`pwd` - cd $tmpdir - wget -T 10 -t 3 -c http://www.openslr.org/resources/21/es_wordlist.json.tgz - - if [ ! -e ${tmpdir}/es_wordlist.json.tgz ]; then - echo "Download of the large Spanish word list failed" - exit 1; - fi - - tar -xovzf es_wordlist.json.tgz || exit 1; - cd $cwd - fi - - # Merge with gigaword corpus - $local/merge_lexicons.py ${tmpdir} ${lexicon} - mv $tmpdir/uniquewords $tmpdir/uniquewords.small - mv $tmpdir/uniquewords64k $tmpdir/uniquewords -fi - -#Then get the list of phones form basic_rules in the lexicon folder -if [ $stage -le 1 ]; then - if [ ! -d "$lexicon/callhome_spanish_lexicon_970908" ]; then - echo "Could not find folder callhome_spanish_lexicon_970908 in the lexicon folder" - exit 1; - fi - - # This is a preliminary attempt to get the unique phones from the LDC lexicon - # This will be extended based on our lexicon later - perl $local/find_unique_phones.pl $lexicon/callhome_spanish_lexicon_970908 $tmpdir - -fi - -#Get pronunciation for each word using the spron.pl file in the lexicon folder -if [ $stage -le 2 ]; then - #cd $lexicon/callhome_spanish_lexicon_970908 - # Replace all words for which no pronunciation was generated with an orthographic - # representation - cat $tmpdir/uniquewords | $local/spron.pl $lexicon/callhome_spanish_lexicon_970908/preferences $lexicon/callhome_spanish_lexicon_970908/basic_rules \ - | cut -f1 | sed -r 's:#\S+\s\S+\s\S+\s\S+\s(\S+):\1:g' \ - | awk -F '[/][/]' '{print $1}' \ - > $tmpdir/lexicon_raw -fi - -#Break the pronunciation down according to the format required by Kaldi -if [ $stage -le 3 ]; then - # Creates a KALDI compatible lexicon, and extends the phone list - perl $local/isolate_phones.pl $tmpdir - cat $tmpdir/phones_extended | sort | awk '{if ($1 != "") {print;}}' > $tmpdir/phones_extended.1 - mv $tmpdir/phones $tmpdir/phones.small - mv $tmpdir/phones_extended.1 $tmpdir/phones - sort $tmpdir/phones -o $tmpdir/phones - paste -d ' ' $tmpdir/uniquewords $tmpdir/lexicon_one_column | sed -r 's:(\S+)\s#.*:\1 oov:g' > $tmpdir/lexicon.1 - #paste -d ' ' $tmpdir/uniquewords $tmpdir/lexicon_one_column | grep -v '#' > $tmpdir/lexicon.1 -fi - -if [ $stage -le 4 ]; then - # silence phones, one per line. - for w in sil laughter noise oov; do echo $w; done > $dir/silence_phones.txt - echo sil > $dir/optional_silence.txt - - # An extra question will be added by including the silence phones in one class. - cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > \ - $dir/extra_questions.txt || exit 1; - - # Remove [] chars from phones - cat $tmpdir/phones | awk '{if ($1 != "_" && $1 != "[" && $1 != "]") {print;}}' > $tmpdir/phones.1 - rm $tmpdir/phones - mv $tmpdir/phones.1 $tmpdir/phones - cp $tmpdir/phones $dir/nonsilence_phones.txt - - if [ -f $tmpdir/lexicon.2 ]; then rm $tmpdir/lexicon.2; fi - cp "$tmpdir/lexicon.1" "$tmpdir/lexicon.2" - - # Add prons for laughter, noise, oov - for w in `grep -v sil $dir/silence_phones.txt`; do - sed -i "/\[$w\]/d" $tmpdir/lexicon.2 - done - - for w in `grep -v sil $dir/silence_phones.txt`; do - echo "[$w] $w" - done | cat - $tmpdir/lexicon.2 > $tmpdir/lexicon.3 || exit 1; - - cat $tmpdir/lexicon.3 \ - <( echo "mm m" - echo " oov" ) > $tmpdir/lexicon.4 - - # From the lexicon remove _ from the phonetic representation - cat $tmpdir/lexicon.4 | sed 's:\s_::g' > $tmpdir/lexicon.5 - - cp "$tmpdir/lexicon.5" $dir/lexicon.txt - - cat $datadir/text | \ - awk '{for (n=2;n<=NF;n++){ count[$n]++; } } END { for(n in count) { print count[n], n; }}' | \ - sort -nr > $tmpdir/word_counts - - awk '{print $1}' $dir/lexicon.txt | \ - perl -e '($word_counts)=@ARGV; - open(W, "<$word_counts")||die "opening word-counts $word_counts"; - while() { chop; $seen{$_}=1; } - while() { - ($c,$w) = split; - if (!defined $seen{$w}) { print; } - } ' $tmpdir/word_counts > $tmpdir/oov_counts.txt - echo "*Highest-count OOVs are:" - head -n 20 $tmpdir/oov_counts.txt -fi - -$utils/validate_dict_dir.pl $dir -exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh deleted file mode 100755 index cebf3b222ab..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/fsp_train_lms.sh +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -# To be run from one level above this directory -# Generate the text for the LM training -tmp_dir=data/local/tmp -train_all=data/local/data/train_all - -if [ $# -lt 1 ]; then - echo "Specify the location of the split files" - exit 1; -fi - -splitFile=$1 -split=train -# Train only -if [ -d $tmp_dir/$split ]; then - rm -r $tmp_dir/$split -fi -cp -r $train_all $tmp_dir/$split - -awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ -$splitFile/$split $train_all/segments > $tmp_dir/$split/segments - -n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $tmp_dir/$split/segments | sort | uniq | wc -l` - -echo "$n conversations left in split $split" - -utils/fix_data_dir.sh $tmp_dir/$split -# There is no feature file yet, use --no-feats switch -utils/validate_data_dir.sh --no-feats $tmp_dir/$split - -# Now use this training text - -text=$tmp_dir/train/text -lexicon=data/local/dict/lexicon.txt - -for f in "$text" "$lexicon"; do - [ ! -f $x ] && echo "$0: No such file $f" && exit 1; -done - -# This script takes no arguments. It assumes you have already run -# fisher_data_prep.sh and fisher_prepare_dict.sh -# It takes as input the files -#data/train_all/text -#data/local/dict/lexicon.txt - -dir=`pwd`/data/local/lm -mkdir -p $dir -export LC_ALL=C # You'll get errors about things being not sorted, if you -# have a different locale. -export PATH=$PATH:`pwd`/../../../tools/kaldi_lm -( # First make sure the kaldi_lm toolkit is installed. - cd ../../../tools || exit 1; - if [ -d kaldi_lm ]; then - echo Not installing the kaldi_lm toolkit since it is already there. - else - echo Downloading and installing the kaldi_lm tools - if [ ! -f kaldi_lm.tar.gz ]; then - wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; - fi - tar -xvzf kaldi_lm.tar.gz || exit 1; - cd kaldi_lm - make || exit 1; - echo Done making the kaldi_lm tools - fi -) || exit 1; - -mkdir -p $dir - - -cleantext=$dir/text.no_oov - -cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } - {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ - > $cleantext || exit 1; - - -cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ - sort -nr > $dir/word.counts || exit 1; - - -# Get counts from acoustic training transcripts, and add one-count -# for each word in the lexicon (but not silence, we don't want it -# in the LM-- we'll add it optionally later). -cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ - cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ - sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; - -# note: we probably won't really make use of as there aren't any OOVs -cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ - || exit 1; - -# note: ignore 1st field of train.txt, it's the utterance-id. -cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} - { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ - || exit 1; - -train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; - -# Perplexity over 88307.000000 words (excluding 691.000000 OOVs) is 71.241332 - -# note: output is -# data/local/lm/3gram-mincount/lm_unpruned.gz - - -exit 0 - -echo "Baseline" - -# From here is some commands to do a baseline with SRILM (assuming -# you have it installed). -heldout_sent=158126 # Don't change this if you want result to be comparable with - # kaldi_lm results -sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities. -mkdir -p $sdir -cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $sdir/heldout -cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $sdir/train - -cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) > $sdir/wordlist - - -ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \ - -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz -ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout - -# data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM -# file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs -# 0 zeroprobs, logprob= -165170 ppl= 71.7609 ppl1= 123.258 - - -# Note: perplexity SRILM gives to Kaldi-LM model is similar to what kaldi-lm reports above. -# Difference in WSJ must have been due to different treatment of . -ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout - -# data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM -# file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs -# 0 zeroprobs, logprob= -164990 ppl= 71.4278 ppl1= 122.614 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py deleted file mode 100755 index 9c590635562..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_1_best.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -# Extracts one best output for a set of files -# The list of files in the conversations for which 1 best output has to be extracted -# words.txt - -import os -import sys - -scoringFile = "exp/sgmm2x_6a_mmi_b0.2/decode_test_it4/scoring/10.tra" -wordsFile = open('exp/sgmm2x_6a/graph/words.txt') -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/test') -oneBestTmp = 'exp/sgmm2x_6a_mmi_b0.2/one-best/asr-test' -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.test', 'w+') -timLocation = '/export/a04/gkumar/corpora/fishcall/fisher/tim' - -def findTranscription(timeDetail): - file1 = open(scoringFile) - for line in file1: - lineComp = line.split() - if lineComp[0] == timeDetail: - return " ".join(lineComp[1:]) - # No result found - return -1 - -words = {} - -# Extract word list -for line in wordsFile: - lineComp = line.split() - words[int(lineComp[1])] = lineComp[0].strip() - -# Now read list of files in conversations -fileList = [] -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# Now get timing information to concatenate the ASR outputs -if not os.path.exists(oneBestTmp): - os.makedirs(oneBestTmp) - -for item in fileList: - timingFile = open(timLocation + '/' + item + '.es') - newFile = open(oneBestTmp + '/' + item + '.es', 'w+') - for line in timingFile: - timeInfo = line.split() - mergedTranslation = "" - for timeDetail in timeInfo: - #Locate this in ASR dev/test, this is going to be very slow - tmp = findTranscription(timeDetail) - if tmp != -1: - mergedTranslation = mergedTranslation + " " + tmp - mergedTranslation = mergedTranslation.strip() - transWords = [words[int(x)] for x in mergedTranslation.split()] - newFile.write(" ".join(transWords) + "\n") - provFile.write(" ".join(transWords) + "\n") - - newFile.close() -provFile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl deleted file mode 100755 index ca5b2a46f8e..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_data_weights.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env perl - -# Nagendra Kumar Goel - -# This takes two arguments: -# 1) Pocolm training output folder -# 2) rnnlm weights file name (for output) - -use POSIX; -use List::Util qw[min max]; - -if (@ARGV != 2) { - die "Usage: get_data_weights.pl \n"; -} - -$pdir = shift @ARGV; -$out = shift @ARGV; - -open(P, "<$pdir/metaparameters") || die "Could not open $pdir/metaparameters"; -open(N, "<$pdir/names") || die "Could not open $pdir/names" ; -open(O, ">$out") || die "Could not open $out for writing" ; - -my %scores = (); - -while() { - @n = split(/\s/,$_); - $name = $n[1]; - $w =

; - @w = split(/\s/,$w); - $weight = $w[1]; - $scores{$name} = $weight; -} - -$min = min(values %scores); - -for(keys %scores) { - $weightout = POSIX::ceil($scores{$_} / $min); - print O "$_\t1\t$weightout\n"; -} diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py deleted file mode 100755 index 5430c18bb5b..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_lattices.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -# Extracts one best output for a set of files -# The list of files in the conversations for which 1 best output has to be extracted -# words.txt - -from __future__ import print_function -import os -import sys -import subprocess - -latticeLocation = 'latjosh-bmmi/lattices-pushed/' - -tmpdir = 'data/local/data/tmp/bmmi-t/lattmp' -invalidplfdir = 'data/local/data/tmp/bmmi-t/invalidplf' -symtable = '/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/data/lang/words.clean.txt' - -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/test') -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/asr.test.plf', 'w+') -invalidPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/invalidPLF', 'w+') -blankPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/blankPLF', 'w+') -rmLines = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/bmmi-t/removeLines', 'w+') - -if not os.path.exists(tmpdir): - os.makedirs(tmpdir) -if not os.path.exists(invalidplfdir): - os.makedirs(invalidplfdir) -else: - os.system("rm " + invalidplfdir + "/*") - -def latticeConcatenate(lat1, lat2): - ''' - Concatenates lattices, writes temporary results to tmpdir - ''' - if lat1 == "": - os.system('rm ' + tmpdir + '/tmp.lat') - return lat2 - else: - proc = subprocess.Popen(['fstconcat', lat1, lat2, (tmpdir + '/tmp.lat')]) - proc.wait() - return tmpdir + '/tmp.lat' - - -def findLattice(timeDetail): - ''' - Finds the lattice corresponding to a time segment - ''' - if os.path.isfile(latticeLocation + timeDetail + '.lat'): - return latticeLocation + timeDetail + '.lat' - else: - return -1 - - -# Now read list of files in conversations -fileList = [] -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# Now get timing information to concatenate the ASR outputs - -lineNo = 1 -for item in fileList: - timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') - for line in timingFile: - timeInfo = line.split() - - # For utterances that are concatenated in the translation file, - # the corresponding FSTs have to be translated as well - mergedTranslation = "" - for timeDetail in timeInfo: - tmp = findLattice(timeDetail) - if tmp != -1: - # Concatenate lattices - mergedTranslation = latticeConcatenate(mergedTranslation, tmp) - - print(mergedTranslation) - if mergedTranslation != "": - - # Sanjeev's Recipe : Remove epsilons and topo sort - finalFST = tmpdir + "/final.fst" - os.system("fstrmepsilon " + mergedTranslation + " | fsttopsort - " + finalFST) - - # Now convert to PLF - proc = subprocess.Popen('/export/a04/gkumar/corpora/fishcall/bin/fsm2plf.sh ' + symtable + ' ' + finalFST, stdout=subprocess.PIPE, shell=True) - PLFline = proc.stdout.readline() - finalPLFFile = tmpdir + "/final.plf" - finalPLF = open(finalPLFFile, "w+") - finalPLF.write(PLFline) - finalPLF.close() - - # now check if this is a valid PLF, if not write it's ID in a - # file so it can be checked later - proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True) - line = proc.stdout.readline() - print("{} {}".format(line, lineNo)) - if line.strip() != "PLF format appears to be correct.": - os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0]) - invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n") - rmLines.write("{}\n".format(lineNo)) - else: - provFile.write(PLFline) - else: - blankPLF.write(timeInfo[0] + "\n") - rmLines.write("{}\n".format(lineNo)) - # Now convert to PLF - lineNo += 1 - -provFile.close() -invalidPLF.close() -blankPLF.close() -rmLines.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh deleted file mode 100755 index 451a7c529fb..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_oracle.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# Gets lattice oracles -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -if [ $# -lt 3 ]; then - echo "Specify lattice dir, symbol table and text file for partition" - exit 1; -fi - -latticeDir=$1 -textFile=$3 -symTable=$2 -oracleDir=$latticeDir/oracle - -echo $latticeDir -echo $oracleDir - -. ./path.sh - -if [ ! -f $textFile -o ! -f $symTable -o ! -d $latticeDir ]; then - echo "Required files not found" - exit 1; -fi - -mkdir -p $oracleDir - -cat $textFile | sed 's:\[laughter\]::g' | sed 's:\[noise\]::g' | \ - utils/sym2int.pl --map-oov [oov] -f 2- $symTable | \ - $KALDI_ROOT/src/latbin/lattice-oracle --word-symbol-table=$symTable "ark:gunzip -c $latticeDir/lat.*.gz|" ark:- ark,t:$oracleDir/oracle.tra 2>$oracleDir/oracle.log - -sort -k1,1 -u $oracleDir/oracle.tra -o $oracleDir/oracle.tra diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py deleted file mode 100755 index fc13a7af701..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_rnnlm_wordlist.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# 2018 Saikiran Valluri, GoVivace inc. - -import os, sys - -if len(sys.argv) < 5: - print( "Usage: python get_rnnlm_wordlist.py ") - sys.exit() - -lexicon_words = open(sys.argv[1], 'r', encoding="utf-8") -pocolm_words = open(sys.argv[2], 'r', encoding="utf-8") -rnnlm_wordsout = open(sys.argv[3], 'w', encoding="utf-8") -oov_wordlist = open(sys.argv[4], 'w', encoding="utf-8") - -line_count=0 -lexicon=[] - -for line in lexicon_words: - lexicon.append(line.split()[0]) - rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') - line_count = line_count + 1 - -for line in pocolm_words: - if not line.split()[0] in lexicon: - oov_wordlist.write(line.split()[0]+'\n') - rnnlm_wordsout.write(line.split()[0] + " " + str(line_count)+'\n') - line_count = line_count + 1 - -lexicon_words.close() -pocolm_words.close() -rnnlm_wordsout.close() -oov_wordlist.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py b/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py deleted file mode 100644 index 3ecd16772d7..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/get_unigram_weights_vocab.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# 2018 Saikiran Valluri, GoVivace inc. - -import os, sys - -if len(sys.argv) < 3: - print("Usage : python . ") - print(" Used for generating the unigram weights for second pass vocabulary from the first pass pocolm training metaparameters.") - sys.exit() - -pocolmdir=sys.argv[1] -unigramwts=open(sys.argv[2], 'w') - -names = open(pocolmdir+"/names", 'r') -metaparams = open(pocolmdir+"/metaparameters", 'r') - -name_mapper={} -for line in names: - fields=line.split() - name_mapper[fields[0]] = fields[1] - -lns = metaparams.readlines() -for lineno in range(len(name_mapper.keys())): - line = lns[lineno] - fileid = line.split()[0].split("_")[-1] - weight = line.split()[1] - unigramwts.write(name_mapper[fileid] + " " + weight + "\n") - -names.close() -unigramwts.close() -metaparams.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl deleted file mode 100755 index 0366dcdacb0..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/isolate_phones.pl +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# Once the phonetic representation for words is generated by the LDC lexicon -# This script converts them into a KALDI compatible format -# In addition, it extends the list of phonemes to consider based on -# orthograhic representations of those words which do not have stressed vowels - -use utf8; - -($tmpdir)=$ARGV[0]; -open(L, "<", "$tmpdir/lexicon_raw") || die "Can't open raw lexicon"; -open(P, "<" , "$tmpdir/phones") || die "Can't open phone file"; -open(I, ">$tmpdir/lexicon_one_column") || die "Can't open text file for writing"; -open(E, ">$tmpdir/phones_extended") || die "Can't open ex-phone file for writing"; -binmode(P, ":utf8"); -binmode(L, ":utf8"); -binmode(I, ":utf8"); -binmode(E, ":utf8"); - -#Get all phones -my %phones = qw(); -while (

) { - chomp; - $phones{$_} = 1; -} - -print @phones; - -while () { - if (substr($_, 0, 1) eq "#") { - print I $_; - next; - } - $len = length; - $current = 0; - $splitWord = ""; - while ($current < $len) { - #First check for two char codes - $currentChar2 = substr($_, $current, 2); - $currentChar1 = substr($_, $current, 1); - if (exists($phones{$currentChar2})) { - $splitWord = $splitWord . " " . $currentChar2; - $current = $current + 2; - } - else { - # Check if this phone exists - if (!exists($phones{$currentChar1})) { - $phones{$currentChar1} = 1 - } - $splitWord = $splitWord . " " . $currentChar1; - $current = $current + 1; - } - } - $splitWord =~ s/^\s*(.*?)\s*$/$1/; - print I $splitWord, "\n"; -} - -# Now write the phones to the extended phone file -foreach my $key (keys %phones) { - print E $key, "\n"; -} - -close(L); -close(P); -close(I); -close(E); diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh deleted file mode 100755 index bbe0af5810c..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/latconvert.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env bash -# Author : Gaurav Kumar, Johns Hopkins University -# Creates OpenFST lattices from Kaldi lattices -# This script needs to be run from one level above this directory - -. ./path.sh - -if [ $# -lt 3 ]; then - echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale" - exit 1 -fi - -prunebeam=2 - -latdir=$1 -decode_dir=$2 -acoustic_scale=$3 -#latdir="latjosh-2-callhome" -#decode_dir=exp/tri5a/decode_$partition -#acoustic_scale=0.077 - -stage=0 - -if [ -d $decode_dir ] -then - # TODO:Add scaling factor for weights, how? - rawLatDir="lattices" - compiledLatDir="lattices-bin" - preplfLatDir="lattices-pushed" - - mkdir -p $latdir - mkdir -p $latdir/$rawLatDir - mkdir -p $latdir/$compiledLatDir - mkdir -p $latdir/$preplfLatDir - - for l in $decode_dir/lat.*.gz - do - ( - # Extract file name and unzip the file first - bname=${l##*/} - bname="$latdir/${bname%.gz}" - gunzip -c $l > "$bname.bin" - - if [ $stage -le 0 ]; then - - # Now copy into ark format - $KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw" - - # Prune lattices - $KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned" - - # Convert to an openfst compatible format - $KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst - - fi - - if [ $stage -le 1 ]; then - fileName="" - fileLine=0 - - while read line; do - if [ $fileLine = 0 ]; then - fileName="$line" - fileLine=1 - continue - fi - if [ -z "$line" ]; then - fileLine=0 - continue - fi - # Replace laugh, unk, oov, noise with eps - echo "$line" | awk '{if ($3 == 2038 || $3 == 2039 || $3 == 2040) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat" - done < $bname.ark.fst - echo "Done isolating lattices" - fi - ) & - done - wait - rm $latdir/*.bin - rm $latdir/*.pruned - - - if [ $stage -le 2 ]; then - #Compile lattices - for l in $latdir/$rawLatDir/*.lat - do - ( - # Arc type needs to be log - bname=${l##*/} - fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname - ) & - done - wait - echo "Done compiling lattices." - fi - - if [ $stage -le 3 ]; then - #Sanjeev's Recipe for creating valid PLF compatible FSTs" - # Create a dummy FST with one state and no arcs first - echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst - # Push Lattice weights towards initial state - for l in $latdir/$compiledLatDir/*.lat - do - ( - bname=${l##*/} - fstrmepsilon $latdir/$compiledLatDir/$bname | \ - fstpush --push_weights --remove_total_weight - | \ - # Do not topo sort here, do it before converting into PLF - # Sanjeev's Recipe : Concatenate with dummy FST - fstconcat - $latdir/$preplfLatDir/dummy.fst | \ - fstreverse - | \ - fstrmepsilon - | \ - fstreverse - $latdir/$preplfLatDir/$bname - ) & - done - wait - # Let's take a moment to thank the dummy FST for playing its - # part in this process. However, it has to go now. - rm $latdir/$preplfLatDir/dummy.fst - echo "Done performing fst push (initial state)" - fi -else - echo "Complete training and decoding first" -fi diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py b/egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py deleted file mode 100755 index 94546dc44c3..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/merge_lexicons.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# 2018 Saikiran Valluri, GoVivace inc., Avaaya - -# Merges unique words from Spanish Fisher, Gigaword and the LDC spanish lexicon -from __future__ import print_function -import sys -import re -import json -import codecs -import operator - -wordlimit = 64000 -tmpdir = sys.argv[1] -ldc_lexicon = sys.argv[2] -uw_fisher = tmpdir + "/uniquewords" -uw_gigaword = tmpdir + "/es_wordlist.json" -uw_LDC = ldc_lexicon + "/callhome_spanish_lexicon_970908/preferences" - -filtered_letters = re.compile(u'[¡¥ª°º¿àçèëìîôö0123456789]') -merged_lexicon = [] -# All three lexicons are in different formats -# First add the data from lexicon_fisher (A) into the dictionary -fisher = codecs.open(uw_fisher, encoding='utf-8') -for line in fisher: - merged_lexicon.append(line.strip()) -fisher.close() - -print("After adding the fisher data, the lexicon contains {} entries".format(len(merged_lexicon))) - -# Now add data from the LDC lexicon -ldc = codecs.open(uw_LDC, encoding='iso-8859-1') -for line in ldc: - entries = line.strip().split('\t') - if entries[0].lower() not in merged_lexicon: - merged_lexicon.append(entries[0].lower()) - -print("After adding the LDC data, the lexicon contains {} entries".format(len(merged_lexicon))) - -# Finally add the gigaword data -gigaword = json.load(open(uw_gigaword)) -gigaword = reversed(sorted(gigaword.items(), key=operator.itemgetter(1))) - -for item in gigaword: - # We need a maximum of wordlimit words in the lexicon - if len(merged_lexicon) == wordlimit: - break - - if item[0].lower() not in merged_lexicon: - merged_lexicon.append(item[0].lower()) - -print("After adding the Gigaword data, the lexicon contains {} entries".format(len(merged_lexicon))) - -# Now write the uniquewords to a file -lf = codecs.open(tmpdir + '/uniquewords64k', encoding='utf-8', mode='w+') -ltuples = sorted(merged_lexicon) - -for item in ltuples: - if not item==u'ñ' and not re.search(filtered_letters, item): - lf.write(item + "\n") - -lf.close() - -print("Finshed writing unique words") diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh deleted file mode 100755 index a95893f698a..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/monitor_denlats.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -currentJob=0 - -dir=/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/exp/sgmm2x_6a_denlats - -for f in $dir/.done.*; do - d=`echo ${f##*/} | awk 'BEGIN {FS="."} {print $3}'` - if [ $d -gt $currentJob ]; then - currentJob=$d - fi -done - -currentJob=$((currentJob+1)) - -echo Currently processing job : $currentJob - -for i in $(seq 210); do - job[$i]=$i -done - -dir=/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/exp/sgmm2x_6a_denlats/log/$currentJob/q - -for f in $dir/done.*; do - d=`echo ${f##*/} | awk 'BEGIN {FS="."} {print $3}'` - unset job[$d] -done - -echo sub-splits left : ${#job[@]} -echo ${job[@]} diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh deleted file mode 100755 index cc9de4d26c5..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/nnet3/run_ivector_common.sh +++ /dev/null @@ -1,187 +0,0 @@ -#!/bin/bash - -set -e -o pipefail - -# This script is called from scripts like local/nnet3/run_tdnn.sh and -# local/chain/run_tdnn.sh (and may eventually be called by more scripts). It -# contains the common feature preparation and iVector-related parts of the -# script. See those scripts for examples of usage. - - -stage=7 -nj=30 -train_set=train # you might set this to e.g. train. -test_sets="test dev" -gmm=tri5a # This specifies a GMM-dir from the features of the type you're training the system on; - # it should contain alignments for 'train_set'. - -num_threads_ubm=32 -nnet3_affix= # affix for exp/nnet3 directory to put iVector stuff in (e.g. - # in the tedlium recip it's _cleaned). - -. ./cmd.sh -. ./path.sh -. utils/parse_options.sh - - -gmm_dir=exp/${gmm} -ali_dir=exp/${gmm}_ali_${train_set}_sp - -for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do - if [ ! -f $f ]; then - echo "$0: expected file $f to exist" - exit 1 - fi -done - - - -if [ $stage -le 7 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then - echo "$0: data/${train_set}_sp_hires/feats.scp already exists." - echo " ... Please either remove it, or rerun this script with stage > 7." - exit 1 -fi - - -if [ $stage -le 8 ]; then - echo "$0: preparing directory for speed-perturbed data" - utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp -fi - -if [ $stage -le 9 ]; then - echo "$0: creating high-resolution MFCC features" - - # this shows how you can split across multiple file-systems. we'll split the - # MFCC dir across multiple locations. You might want to be careful here, if you - # have multiple copies of Kaldi checked out and run the same recipe, not to let - # them overwrite each other. - mfccdir=data/${train_set}_sp_hires/data - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/wsj-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage - fi - - for datadir in ${train_set}_sp ${test_sets}; do - utils/copy_data_dir.sh data/$datadir data/${datadir}_hires - done - - # do volume-perturbation on the training data prior to extracting hires - # features; this helps make trained nnets more invariant to test data volume. - utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires - - for datadir in ${train_set}_sp ${test_sets}; do - steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ - --cmd "$train_cmd" data/${datadir}_hires - steps/compute_cmvn_stats.sh data/${datadir}_hires - utils/fix_data_dir.sh data/${datadir}_hires - done -fi - -if [ $stage -le 10 ]; then - echo "$0: computing a subset of data to train the diagonal UBM." - - mkdir -p exp/nnet3${nnet3_affix}/diag_ubm - temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm - - # train a diagonal UBM using a subset of about a quarter of the data - num_utts_total=$(wc -l in the history of a n-gram -# un-comment the following line -#limit_unk_history_opt="--limit-unk-history=true" - -for order in ${ngram_order}; do - # decide on the vocabulary. - # Note: you'd use --wordlist if you had a previously determined word-list - # that you wanted to use. - lm_name="${num_word}_${order}" - min_counts='' - # Note: the following might be a more reasonable setting: - # min_counts='fisher=2 swbd1=1' - if [ -n "${min_counts}" ]; then - lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`" - fi - unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm - train_lm.py --num-words=${num_word} --num-splits=5 --warm-start-ratio=10 ${max_memory} \ - --min-counts=${min_counts} \ - --keep-int-data=true ${fold_dev_opt} ${bypass_metaparam_optim_opt} \ - ${limit_unk_history_opt} ${textdir} ${order} ${lm_dir}/work ${unpruned_lm_dir} - - if [ $pocolm_stage -eq 2 ];then - mkdir -p ${arpa_dir} - format_arpa_lm.py ${max_memory} ${unpruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_unpruned.arpa.gz - - # example of pruning. note: the threshold can be less than or more than one. - get_data_prob.py ${max_memory} ${textdir}/dev.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' - for threshold in 1.0 2.0 4.0; do - pruned_lm_dir=${lm_dir}/${lm_name}_prune${threshold}.pocolm - prune_lm_dir.py --final-threshold=${threshold} ${max_memory} ${unpruned_lm_dir} ${pruned_lm_dir} 2>&1 | tail -n 5 | head -n 3 - get_data_prob.py ${max_memory} ${textdir}/dev.txt ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' - - format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${threshold}.arpa.gz - - done - - # example of pruning by size. - size=1000000 - pruned_lm_dir=${lm_dir}/${lm_name}_prune${size}.pocolm - prune_lm_dir.py --target-num-ngrams=${size} ${max_memory} ${unpruned_lm_dir} ${pruned_lm_dir} 2>&1 | tail -n 8 | head -n 6 | grep -v 'log-prob changes' - get_data_prob.py ${textdir}/dev.txt ${max_memory} ${pruned_lm_dir} 2>&1 | grep -F '[perplexity' - - format_arpa_lm.py ${max_memory} ${pruned_lm_dir} | gzip -c > ${arpa_dir}/${lm_name}_${order}gram_prune${size}.arpa.gz - fi -done - -# (run local/srilm_baseline.sh ${num_word} to see the following result e.g. local/srilm_baseline.sh 40000 ) - -# the following does does some self-testing, including -# that the computed derivatives are accurate. -# local/self_test.sh - -# perplexities from pocolm-estimated language models with pocolm's interpolation -# method from orders 3, 4, and 5 are: -# order 3: optimize_metaparameters.py: final perplexity without barrier function was -4.358818 (perplexity: 78.164689) -# order 4: optimize_metaparameters.py: final perplexity without barrier function was -4.309507 (perplexity: 74.403797) -# order 5: optimize_metaparameters.py: final perplexity without barrier function was -4.301741 (perplexity: 73.828181) - -# note, the perplexities from pocolm-estimated language models with SRILM's -# interpolation from orders 3 and 4 are (from local/pocolm_with_srilm_combination.sh), -# 78.8449 and 75.2202 respectively. - -# note, the perplexities from SRILM-estimated language models with SRILM's -# interpolation tool from orders 3 and 4 are (from local/srilm_baseline.sh), -# 78.9056 and 75.5528 respectively. diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py b/egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py deleted file mode 100755 index 5c68e1204b2..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/process_oracle.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -# Processes lattice oracles - -import os -import sys - -oracleDir = "exp/tri5a/decode_callhome_train/oracle" -wordsFile = open('exp/sgmm2x_6a/graph/words.txt') -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/train') -oracleTmp = 'exp/tri5a/one-best/oracle-ch-train' -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-callhome/oracle.train', 'w+') -timLocation = '/export/a04/gkumar/corpora/fishcall/callhome/tim' - -def findTranscription(timeDetail): - file1 = open(oracleDir + "/oracle.tra") - for line in file1: - lineComp = line.split() - if lineComp[0] == timeDetail: - return " ".join(lineComp[1:]) - # No result found - return -1 - -words = {} - -# Extract word list -for line in wordsFile: - lineComp = line.split() - words[int(lineComp[1])] = lineComp[0].strip() - -# Now read list of files in conversations -fileList = [] -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# TODO: Make sure they match the order in which these english files are being written - -# Now get timing information to concatenate the ASR outputs -if not os.path.exists(oracleTmp): - os.makedirs(oracleTmp) - -#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') -for item in fileList: - timingFile = open(timLocation + '/' + item + '.es') - newFile = open(oracleTmp + '/' + item + '.es', 'w+') - for line in timingFile: - timeInfo = line.split() - mergedTranslation = "" - for timeDetail in timeInfo: - #Locate this in ASR dev/test, this is going to be very slow - tmp = findTranscription(timeDetail) - if tmp != -1: - mergedTranslation = mergedTranslation + " " + tmp - mergedTranslation = mergedTranslation.strip() - transWords = [words[int(x)] for x in mergedTranslation.split()] - newFile.write(" ".join(transWords) + "\n") - provFile.write(" ".join(transWords) + "\n") - - newFile.close() -provFile.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh deleted file mode 100755 index 1b54b304e50..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/rescore.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -. ./cmd.sh - -for iter in 1 2 3 4; do - steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri5a/decode_test data/lang data/test exp/sgmm2x_6a/decode_test_fmllr \ - exp/sgmm2x_6a_mmi_b0.2/decode_test_fmllr_it$iter & -done - - -for iter in 1 2 3 4; do - steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri5a/decode_dev data/lang data/dev exp/sgmm2x_6a/decode_dev_fmllr \ - exp/sgmm2x_6a_mmi_b0.2/decode_dev_fmllr_it$iter & -done - - -for iter in 1 2 3 4; do - steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri5a/decode_dev2 data/lang data/dev2 exp/sgmm2x_6a/decode_dev2_fmllr \ - exp/sgmm2x_6a_mmi_b0.2/decode_dev2_fmllr_it$iter & -done diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh deleted file mode 100755 index 3850910f312..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/rnnlm.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) -# 2015 Guoguo Chen -# 2017 Hainan Xu -# 2017 Xiaohui Zhang - -# This script trains LMs on the swbd LM-training data. - -# rnnlm/train_rnnlm.sh: best iteration (out of 35) was 34, linking it to final iteration. -# rnnlm/train_rnnlm.sh: train/dev perplexity was 41.9 / 50.0. -# Train objf: -5.07 -4.43 -4.25 -4.17 -4.12 -4.07 -4.04 -4.01 -3.99 -3.98 -3.96 -3.94 -3.92 -3.90 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83 -3.82 -3.81 -3.80 -3.79 -3.78 -3.78 -3.77 -3.77 -3.76 -3.75 -3.74 -3.73 -3.73 -3.72 -3.71 -# Dev objf: -10.32 -4.68 -4.43 -4.31 -4.24 -4.19 -4.15 -4.13 -4.10 -4.09 -4.05 -4.03 -4.02 -4.00 -3.99 -3.98 -3.98 -3.97 -3.96 -3.96 -3.95 -3.94 -3.94 -3.94 -3.93 -3.93 -3.93 -3.92 -3.92 -3.92 -3.92 -3.91 -3.91 -3.91 -3.91 - - -dir=Spanish_gigawrd/rnnlm -pocolm_dir=Spanish_gigawrd/work_pocolm/lm/110000_3.pocolm_pruned -wordslist= -embedding_dim=1024 -lstm_rpd=256 -lstm_nrpd=256 -stage=0 -train_stage=-30 -text_dir=Spanish_gigawrd/text_lm - -. ./cmd.sh -. ./utils/parse_options.sh - -mkdir -p $dir/config -set -e - -for f in $text_dir/dev.txt; do - [ ! -f $f ] && \ - echo "$0: expected file $f to exist;" && exit 1 -done - -if [ $stage -le 0 ]; then - if [ -f $text_dir/unigram_weights ] ; then - mv $text_dir/unigram_weights $pocolm_dir/ - fi - cp $wordslist $dir/config/words.txt - n=`cat $dir/config/words.txt | wc -l` - echo " $n" >> $dir/config/words.txt - - # words that are not present in words.txt but are in the training or dev data, will be - # mapped to during training. - echo "" >$dir/config/oov.txt - local/get_data_weights.pl $pocolm_dir $dir/config/data_weights.txt - rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \ - --unk-word="" \ - --data-weights-file=$dir/config/data_weights.txt \ - $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt - - # choose features - rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ - --use-constant-feature=true \ - --special-words=',,,,[noise],[laughter]' \ - $dir/config/words.txt > $dir/config/features.txt -fi - -if [ $stage -le 1 ]; then - cat <$dir/config/xconfig - input dim=$embedding_dim name=input - relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1)) - fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd - relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3)) - fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd - relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3)) - output-layer name=output include-log-softmax=false dim=$embedding_dim -EOF - rnnlm/validate_config_dir.sh $text_dir $dir/config -fi - -if [ $stage -le 2 ]; then - rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir -fi - -if [ $stage -le 3 ]; then - rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 2 \ - --stage $train_stage --num-epochs 5 --cmd "$train_cmd" $dir -fi - -exit 0 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh deleted file mode 100755 index f88fecc815c..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/run_norm.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -punctuation_symbols=( "," "\"" "\`" "\:" "(" ")" "-" ";" "?" "!" "/" "_" "{" "}" "*" ) - -config=$1 -path_prefix=$2 -data=$3 -job=$4 -dir=$5 - -substitute_arg="" -num_syms=0 - -for i in "${punctuation_symbols[@]}"; do - symbol=${punctuation_symbols[${num_syms}]} - if [ $num_syms -eq 0 ]; then - substitute_arg="sed 's:${i}: :g'" - else - substitute_arg=$substitute_arg" | sed 's:${i}: :g'" - fi - substitute_arg=$substitute_arg" |sed 's:${i}$: :g' | sed 's:^${i}: :g'" - num_syms=$((num_syms+1)) -done -mkdir -p $dir/normalize/$job -local/clean_abbrevs_text.py $data/$job $data/"$job"_processed -mv $data/"$job"_processed $data/$job -echo "cat $data/$job | $substitute_arg" > $dir/normalize/$job/substitute.sh - -bash $dir/normalize/$job/substitute.sh | \ - sed "s: 's:'s:g" | sed "s: 'm:'m:g" | \ - sed "s: \s*: :g" | tr 'A-ZÂÁÀÄÊÉÈËÏÍÎÖÓÔÖÚÙÛÑÇ' 'a-zâáàäêéèëïíîöóôöúùûñç' > $dir/normalize/$job/text -normalizer_main --config=$config --path_prefix=$path_prefix <$dir/normalize/$job/text >$dir/$job.txt - -exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh deleted file mode 100755 index 9148b1f1171..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/run_sgmm2x.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -# This is as run_sgmm2.sh but excluding the "speaker-dependent weights", -# so not doing the symmetric SGMM. - -. ./cmd.sh - -## SGMM on top of LDA+MLLT+SAT features. -if [ ! -f exp/ubm6a/final.mdl ]; then - steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 800 data/train data/lang exp/tri5a_ali exp/ubm6a || exit 1; -fi -# Double the number of SAT states : sanjeev -steps/train_sgmm2.sh --spk-dep-weights false --cmd "$train_cmd" 10000 120000 \ - data/train data/lang exp/tri5a_ali exp/ubm6a/final.ubm exp/sgmm2x_6a || exit 1; - -utils/mkgraph.sh data/lang_test exp/sgmm2x_6a exp/sgmm2x_6a/graph || exit 1; - -steps/decode_sgmm2.sh --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ - --transform-dir exp/tri5a/decode_dev exp/sgmm2x_6a/graph data/dev exp/sgmm2x_6a/decode_dev || exit 1; - -steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ - --transform-dir exp/tri5a/decode_dev exp/sgmm2x_6a/graph data/dev exp/sgmm2x_6a/decode_dev_fmllr || exit 1; - -steps/decode_sgmm2.sh --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ - --transform-dir exp/tri5a/decode_test exp/sgmm2x_6a/graph data/test exp/sgmm2x_6a/decode_test || exit 1; - -steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ - --transform-dir exp/tri5a/decode_test exp/sgmm2x_6a/graph data/test exp/sgmm2x_6a/decode_test_fmllr || exit 1; - -steps/decode_sgmm2.sh --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ - --transform-dir exp/tri5a/decode_dev2 exp/sgmm2x_6a/graph data/dev2 exp/sgmm2x_6a/decode_dev2 || exit 1; - -steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 25 --cmd "$decode_cmd" \ - --transform-dir exp/tri5a/decode_dev2 exp/sgmm2x_6a/graph data/dev2 exp/sgmm2x_6a/decode_dev2_fmllr || exit 1; - - # Now we'll align the SGMM system to prepare for discriminative training. - steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri5a \ - --use-graphs true --use-gselect true data/train data/lang exp/sgmm2x_6a exp/sgmm2x_6a_ali || exit 1; - steps/make_denlats_sgmm2.sh --nj 30 --sub-split 210 --cmd "$decode_cmd" --transform-dir exp/tri5a \ - data/train data/lang exp/sgmm2x_6a_ali exp/sgmm2x_6a_denlats - steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri5a --boost 0.2 \ - data/train data/lang exp/sgmm2x_6a_ali exp/sgmm2x_6a_denlats exp/sgmm2x_6a_mmi_b0.2 - - for iter in 1 2 3 4; do - steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri5a/decode_test data/lang data/test exp/sgmm2x_6a/decode_test exp/sgmm2x_6a_mmi_b0.2/decode_test_it$iter & - done - -wait -steps/decode_combine.sh data/test data/lang exp/tri1/decode exp/tri2a/decode exp/combine_1_2a/decode || exit 1; -steps/decode_combine.sh data/test data/lang exp/sgmm2x_4a/decode exp/tri3b_mmi/decode exp/combine_sgmm2x_4a_3b/decode || exit 1; -# combining the sgmm run and the best MMI+fMMI run. -steps/decode_combine.sh data/test data/lang exp/sgmm2x_4a/decode exp/tri3b_fmmi_c/decode_it5 exp/combine_sgmm2x_4a_3b_fmmic5/decode || exit 1; - -steps/decode_combine.sh data/test data/lang exp/sgmm2x_4a_mmi_b0.2/decode_it4 exp/tri3b_fmmi_c/decode_it5 exp/combine_sgmm2x_4a_mmi_3b_fmmic5/decode || exit 1; - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/score.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/score.sh deleted file mode 120000 index 0afefc3158c..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/score.sh +++ /dev/null @@ -1 +0,0 @@ -../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh deleted file mode 100755 index 21b793a4d27..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/score_oracle.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -oracle_dir=exp/tri5a/decode_callhome_test/oracle -split=callhome_test -data_dir=data/callhome_test -lang_dir=data/lang - -# Make sure that your STM and CTM files are in UTF-8 encoding -# Any other encoding will cause this script to fail/misbehave - -if [ ! -e $oracle_dir -o ! -e $data_dir -o ! -e $lang_dir ]; then - echo "Missing pre-requisites" - exit 1 -fi - -for i in {5..20}; do - mkdir -p $oracle_dir/score_$i - cp $oracle_dir/$split.ctm $oracle_dir/score_$i/ -done - -. /export/babel/data/software/env.sh - -# Start scoring -/export/a11/guoguo/babel/103-bengali-limitedLP.official/local/score_stm.sh $data_dir $lang_dir \ - $oracle_dir - -# Print a summary of the result -grep "Percent Total Error" $oracle_dir/score_*/$split.ctm.dtl diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev deleted file mode 100644 index 77e3b01786f..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/dev +++ /dev/null @@ -1,20 +0,0 @@ -sp_0897.sph -sp_0968.sph -sp_0981.sph -sp_1062.sph -sp_1292.sph -sp_1411.sph -sp_1413.sph -sp_1552.sph -sp_1554.sph -sp_1805.sph -sp_1808.sph -sp_1882.sph -sp_1930.sph -sp_1947.sph -sp_2037.sph -sp_2054.sph -sp_2057.sph -sp_2107.sph -sp_2109.sph -sp_2144.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev deleted file mode 100644 index 77e3b01786f..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/dev +++ /dev/null @@ -1,20 +0,0 @@ -sp_0897.sph -sp_0968.sph -sp_0981.sph -sp_1062.sph -sp_1292.sph -sp_1411.sph -sp_1413.sph -sp_1552.sph -sp_1554.sph -sp_1805.sph -sp_1808.sph -sp_1882.sph -sp_1930.sph -sp_1947.sph -sp_2037.sph -sp_2054.sph -sp_2057.sph -sp_2107.sph -sp_2109.sph -sp_2144.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test deleted file mode 100644 index 0cbc3cc95fd..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/test +++ /dev/null @@ -1,20 +0,0 @@ -sp_0053.sph -sp_0082.sph -sp_0084.sph -sp_0088.sph -sp_0681.sph -sp_0699.sph -sp_0776.sph -sp_0857.sph -sp_1031.sph -sp_1100.sph -sp_1148.sph -sp_1156.sph -sp_1186.sph -sp_1212.sph -sp_1345.sph -sp_1435.sph -sp_1578.sph -sp_1648.sph -sp_1807.sph -sp_1847.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train deleted file mode 100644 index 2c936072534..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_callhome/train +++ /dev/null @@ -1,80 +0,0 @@ -sp_0085.sph -sp_0096.sph -sp_0098.sph -sp_0100.sph -sp_0291.sph -sp_0713.sph -sp_0724.sph -sp_0726.sph -sp_0731.sph -sp_0733.sph -sp_0753.sph -sp_0788.sph -sp_0826.sph -sp_0831.sph -sp_0836.sph -sp_0841.sph -sp_0850.sph -sp_0855.sph -sp_0892.sph -sp_0899.sph -sp_0910.sph -sp_0917.sph -sp_0919.sph -sp_0923.sph -sp_0945.sph -sp_0950.sph -sp_0951.sph -sp_0992.sph -sp_0997.sph -sp_1013.sph -sp_1039.sph -sp_1044.sph -sp_1045.sph -sp_1058.sph -sp_1060.sph -sp_1063.sph -sp_1081.sph -sp_1106.sph -sp_1122.sph -sp_1140.sph -sp_1175.sph -sp_1195.sph -sp_1198.sph -sp_1231.sph -sp_1234.sph -sp_1255.sph -sp_1260.sph -sp_1261.sph -sp_1262.sph -sp_1264.sph -sp_1266.sph -sp_1273.sph -sp_1275.sph -sp_1284.sph -sp_1286.sph -sp_1304.sph -sp_1308.sph -sp_1333.sph -sp_1341.sph -sp_1353.sph -sp_1368.sph -sp_1379.sph -sp_1384.sph -sp_1449.sph -sp_1463.sph -sp_1574.sph -sp_1740.sph -sp_1759.sph -sp_1849.sph -sp_1908.sph -sp_1915.sph -sp_1918.sph -sp_1974.sph -sp_1976.sph -sp_1988.sph -sp_2000.sph -sp_2056.sph -sp_2070.sph -sp_2091.sph -sp_2101.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev deleted file mode 100644 index d3769f0ffb5..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev +++ /dev/null @@ -1,20 +0,0 @@ -20051009_182032_217_fsp.sph -20051009_210519_219_fsp.sph -20051010_212418_225_fsp.sph -20051016_180547_265_fsp.sph -20051016_210626_267_fsp.sph -20051017_180712_270_fsp.sph -20051017_220530_275_fsp.sph -20051017_234550_276_fsp.sph -20051018_210220_279_fsp.sph -20051018_210744_280_fsp.sph -20051019_190221_288_fsp.sph -20051019_210146_289_fsp.sph -20051019_230329_292_fsp.sph -20051022_180817_311_fsp.sph -20051023_232057_325_fsp.sph -20051024_180453_327_fsp.sph -20051024_181110_329_fsp.sph -20051025_212334_337_fsp.sph -20051026_180724_341_fsp.sph -20051026_211309_346_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 deleted file mode 100644 index f1b5c293d67..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/dev2 +++ /dev/null @@ -1,20 +0,0 @@ -20050909_210655_26_fsp.sph -20050910_210708_33_fsp.sph -20050913_210933_49_fsp.sph -20050913_211649_50_fsp.sph -20050915_210434_65_fsp.sph -20050916_180332_68_fsp.sph -20050918_180733_81_fsp.sph -20050918_210841_82_fsp.sph -20050920_212030_93_fsp.sph -20050921_210443_99_fsp.sph -20050923_211304_115_fsp.sph -20050925_180713_120_fsp.sph -20050925_180825_121_fsp.sph -20050926_180516_125_fsp.sph -20050926_180555_126_fsp.sph -20050928_000254_141_fsp.sph -20050930_210540_161_fsp.sph -20051002_180726_170_fsp.sph -20051007_181850_205_fsp.sph -20051007_191217_206_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test deleted file mode 100644 index 6190ced077c..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/test +++ /dev/null @@ -1,20 +0,0 @@ -20051028_180633_356_fsp.sph -20051029_211606_365_fsp.sph -20051030_193924_371_fsp.sph -20051101_212731_386_fsp.sph -20051102_134901_389_fsp.sph -20051102_180402_391_fsp.sph -20051102_181501_393_fsp.sph -20051103_211105_404_fsp.sph -20051103_233456_406_fsp.sph -20051107_184634_438_fsp.sph -20051109_180253_445_fsp.sph -20051109_210353_450_fsp.sph -20051111_181045_470_fsp.sph -20051111_182216_472_fsp.sph -20051112_181649_485_fsp.sph -20051113_155059_492_fsp.sph -20051113_210221_496_fsp.sph -20051113_214925_498_fsp.sph -20051114_181749_505_fsp.sph -20051115_212123_516_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train deleted file mode 100644 index b57683842b2..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/split_fisher/train +++ /dev/null @@ -1,759 +0,0 @@ -20050908_182943_22_fsp.sph -20050908_191808_23_fsp.sph -20050909_210428_25_fsp.sph -20050909_221657_28_fsp.sph -20050910_180310_29_fsp.sph -20050910_180330_30_fsp.sph -20050910_181354_31_fsp.sph -20050910_190223_32_fsp.sph -20050911_180647_34_fsp.sph -20050911_200216_35_fsp.sph -20050911_210429_36_fsp.sph -20050911_210530_37_fsp.sph -20050911_210904_38_fsp.sph -20050912_181441_40_fsp.sph -20050912_181538_41_fsp.sph -20050912_182044_42_fsp.sph -20050912_212913_43_fsp.sph -20050913_180324_44_fsp.sph -20050913_180731_46_fsp.sph -20050913_180947_47_fsp.sph -20050913_210409_48_fsp.sph -20050914_000831_51_fsp.sph -20050914_180332_52_fsp.sph -20050914_180606_53_fsp.sph -20050914_181020_54_fsp.sph -20050914_210243_55_fsp.sph -20050914_210822_56_fsp.sph -20050914_220753_58_fsp.sph -20050915_180728_60_fsp.sph -20050915_180740_61_fsp.sph -20050915_192457_62_fsp.sph -20050915_194045_63_fsp.sph -20050915_210200_64_fsp.sph -20050915_210916_66_fsp.sph -20050915_212325_67_fsp.sph -20050916_180740_69_fsp.sph -20050916_200334_70_fsp.sph -20050916_210235_71_fsp.sph -20050916_210510_72_fsp.sph -20050916_223656_73_fsp.sph -20050917_210406_74_fsp.sph -20050917_210805_75_fsp.sph -20050917_211045_76_fsp.sph -20050917_212041_77_fsp.sph -20050918_180326_80_fsp.sph -20050919_000612_83_fsp.sph -20050919_180511_84_fsp.sph -20050919_180703_85_fsp.sph -20050919_180925_86_fsp.sph -20050919_190254_87_fsp.sph -20050920_180330_88_fsp.sph -20050920_180342_89_fsp.sph -20050920_180607_90_fsp.sph -20050920_181919_91_fsp.sph -20050920_211414_92_fsp.sph -20050920_230520_94_fsp.sph -20050921_180639_95_fsp.sph -20050921_181002_96_fsp.sph -20050921_210340_98_fsp.sph -20050921_211329_101_fsp.sph -20050921_221625_102_fsp.sph -20050922_180618_103_fsp.sph -20050922_180948_104_fsp.sph -20050922_210740_106_fsp.sph -20050922_211003_107_fsp.sph -20050922_230412_108_fsp.sph -20050923_180514_110_fsp.sph -20050923_180530_111_fsp.sph -20050923_210442_114_fsp.sph -20050924_180747_117_fsp.sph -20050924_181124_118_fsp.sph -20050925_210645_122_fsp.sph -20050925_231407_123_fsp.sph -20050926_000425_124_fsp.sph -20050926_180719_127_fsp.sph -20050926_220244_130_fsp.sph -20050926_230706_131_fsp.sph -20050927_180422_132_fsp.sph -20050927_181033_133_fsp.sph -20050927_181232_134_fsp.sph -20050927_210320_135_fsp.sph -20050927_210848_136_fsp.sph -20050927_210947_138_fsp.sph -20050927_211929_139_fsp.sph -20050927_231016_140_fsp.sph -20050928_180631_142_fsp.sph -20050928_210256_144_fsp.sph -20050928_210700_145_fsp.sph -20050928_211113_146_fsp.sph -20050928_220320_147_fsp.sph -20050928_232236_148_fsp.sph -20050929_180318_149_fsp.sph -20050929_180722_150_fsp.sph -20050929_180932_151_fsp.sph -20050929_211337_153_fsp.sph -20050929_220820_154_fsp.sph -20050929_230406_155_fsp.sph -20050930_180329_156_fsp.sph -20050930_180411_157_fsp.sph -20050930_180646_158_fsp.sph -20050930_200308_159_fsp.sph -20051001_180328_163_fsp.sph -20051001_181004_164_fsp.sph -20051001_210749_166_fsp.sph -20051001_211346_167_fsp.sph -20051002_180339_169_fsp.sph -20051002_210324_171_fsp.sph -20051002_220651_174_fsp.sph -20051003_180434_175_fsp.sph -20051003_211042_178_fsp.sph -20051003_220633_179_fsp.sph -20051004_180351_180_fsp.sph -20051004_180542_181_fsp.sph -20051004_180730_182_fsp.sph -20051004_200737_183_fsp.sph -20051004_211611_185_fsp.sph -20051005_180420_187_fsp.sph -20051005_180709_188_fsp.sph -20051005_213606_191_fsp.sph -20051005_220917_192_fsp.sph -20051005_230659_193_fsp.sph -20051006_180416_194_fsp.sph -20051006_180653_195_fsp.sph -20051006_180815_196_fsp.sph -20051006_181525_197_fsp.sph -20051006_183153_199_fsp.sph -20051006_210246_200_fsp.sph -20051006_210417_201_fsp.sph -20051006_220329_203_fsp.sph -20051008_000036_208_fsp.sph -20051008_180249_209_fsp.sph -20051008_181720_210_fsp.sph -20051008_183224_211_fsp.sph -20051008_190256_212_fsp.sph -20051008_211712_214_fsp.sph -20051008_213416_215_fsp.sph -20051009_180444_216_fsp.sph -20051009_190753_218_fsp.sph -20051009_220443_221_fsp.sph -20051010_180650_222_fsp.sph -20051010_182706_223_fsp.sph -20051010_210622_224_fsp.sph -20051010_222853_227_fsp.sph -20051010_231630_228_fsp.sph -20051011_181919_230_fsp.sph -20051011_211026_232_fsp.sph -20051011_220348_233_fsp.sph -20051012_180233_234_fsp.sph -20051012_190241_236_fsp.sph -20051012_193952_237_fsp.sph -20051012_224157_239_fsp.sph -20051013_180458_240_fsp.sph -20051013_180613_241_fsp.sph -20051013_180700_242_fsp.sph -20051013_182213_244_fsp.sph -20051013_210221_245_fsp.sph -20051013_210425_246_fsp.sph -20051013_210941_247_fsp.sph -20051013_220243_248_fsp.sph -20051014_180259_249_fsp.sph -20051014_180940_250_fsp.sph -20051014_180948_251_fsp.sph -20051014_183707_252_fsp.sph -20051014_210348_253_fsp.sph -20051014_210647_254_fsp.sph -20051014_220227_256_fsp.sph -20051014_230339_257_fsp.sph -20051015_180549_258_fsp.sph -20051015_190247_259_fsp.sph -20051015_210138_260_fsp.sph -20051015_210701_261_fsp.sph -20051015_210831_262_fsp.sph -20051016_180926_266_fsp.sph -20051017_000346_269_fsp.sph -20051017_210137_273_fsp.sph -20051017_215732_274_fsp.sph -20051018_180559_277_fsp.sph -20051018_180816_278_fsp.sph -20051018_211701_282_fsp.sph -20051018_231046_283_fsp.sph -20051018_235317_284_fsp.sph -20051019_180448_285_fsp.sph -20051019_183344_287_fsp.sph -20051020_180339_293_fsp.sph -20051020_180759_295_fsp.sph -20051020_210218_297_fsp.sph -20051020_212525_299_fsp.sph -20051020_222944_300_fsp.sph -20051020_234953_301_fsp.sph -20051021_180218_302_fsp.sph -20051021_180508_303_fsp.sph -20051021_190605_304_fsp.sph -20051021_210159_305_fsp.sph -20051021_210530_306_fsp.sph -20051021_222225_307_fsp.sph -20051022_001311_309_fsp.sph -20051022_180452_310_fsp.sph -20051022_180829_312_fsp.sph -20051022_190406_313_fsp.sph -20051022_200517_314_fsp.sph -20051022_210920_315_fsp.sph -20051022_230324_316_fsp.sph -20051022_232428_317_fsp.sph -20051023_180342_318_fsp.sph -20051023_180530_319_fsp.sph -20051023_190301_321_fsp.sph -20051023_210258_322_fsp.sph -20051023_210605_323_fsp.sph -20051023_223751_324_fsp.sph -20051024_000348_326_fsp.sph -20051024_180624_328_fsp.sph -20051024_210748_330_fsp.sph -20051024_211346_331_fsp.sph -20051024_221753_332_fsp.sph -20051024_230857_333_fsp.sph -20051025_180351_334_fsp.sph -20051025_210532_335_fsp.sph -20051025_210959_336_fsp.sph -20051025_220419_338_fsp.sph -20051026_180611_340_fsp.sph -20051026_190359_343_fsp.sph -20051026_210334_344_fsp.sph -20051026_211202_345_fsp.sph -20051026_230956_347_fsp.sph -20051026_234001_348_fsp.sph -20051027_180217_349_fsp.sph -20051027_210159_351_fsp.sph -20051027_210333_352_fsp.sph -20051027_211525_353_fsp.sph -20051027_231329_354_fsp.sph -20051028_180329_355_fsp.sph -20051028_210350_358_fsp.sph -20051028_211904_359_fsp.sph -20051029_200218_363_fsp.sph -20051029_210442_364_fsp.sph -20051029_220538_366_fsp.sph -20051030_000333_367_fsp.sph -20051030_180521_368_fsp.sph -20051030_181001_369_fsp.sph -20051030_190231_370_fsp.sph -20051030_210903_372_fsp.sph -20051030_230444_373_fsp.sph -20051031_180213_374_fsp.sph -20051031_180906_375_fsp.sph -20051031_210229_377_fsp.sph -20051031_220447_379_fsp.sph -20051101_153940_380_fsp.sph -20051101_211314_384_fsp.sph -20051101_223911_387_fsp.sph -20051101_230216_388_fsp.sph -20051102_175957_390_fsp.sph -20051102_210243_394_fsp.sph -20051102_210828_395_fsp.sph -20051102_211130_396_fsp.sph -20051103_163507_398_fsp.sph -20051103_180920_400_fsp.sph -20051103_185102_401_fsp.sph -20051103_210539_403_fsp.sph -20051103_223906_405_fsp.sph -20051104_123901_407_fsp.sph -20051104_180145_408_fsp.sph -20051104_181437_409_fsp.sph -20051104_190247_410_fsp.sph -20051104_210307_411_fsp.sph -20051104_210814_412_fsp.sph -20051104_212121_413_fsp.sph -20051104_222117_414_fsp.sph -20051104_231424_416_fsp.sph -20051105_175657_418_fsp.sph -20051105_181203_419_fsp.sph -20051105_210724_421_fsp.sph -20051105_220745_422_fsp.sph -20051106_180232_424_fsp.sph -20051106_181321_425_fsp.sph -20051106_190219_426_fsp.sph -20051106_200213_427_fsp.sph -20051106_210215_428_fsp.sph -20051106_210310_429_fsp.sph -20051106_211252_430_fsp.sph -20051106_211804_431_fsp.sph -20051106_215339_432_fsp.sph -20051106_221653_433_fsp.sph -20051107_115855_434_fsp.sph -20051107_160351_435_fsp.sph -20051107_180332_436_fsp.sph -20051107_182401_437_fsp.sph -20051107_210309_439_fsp.sph -20051107_212723_440_fsp.sph -20051108_145902_441_fsp.sph -20051108_181424_442_fsp.sph -20051108_210224_443_fsp.sph -20051108_212018_444_fsp.sph -20051109_180413_446_fsp.sph -20051109_181432_447_fsp.sph -20051109_181906_448_fsp.sph -20051109_183631_449_fsp.sph -20051109_210436_451_fsp.sph -20051109_211151_452_fsp.sph -20051109_212148_453_fsp.sph -20051109_232505_454_fsp.sph -20051110_155523_455_fsp.sph -20051110_180208_456_fsp.sph -20051110_180838_457_fsp.sph -20051110_182221_459_fsp.sph -20051110_182318_460_fsp.sph -20051110_210200_461_fsp.sph -20051110_210233_462_fsp.sph -20051110_210454_463_fsp.sph -20051110_211110_464_fsp.sph -20051110_212818_466_fsp.sph -20051110_225245_467_fsp.sph -20051111_181441_471_fsp.sph -20051111_184451_474_fsp.sph -20051111_190326_475_fsp.sph -20051111_194004_477_fsp.sph -20051111_201357_478_fsp.sph -20051111_230329_480_fsp.sph -20051112_000305_482_fsp.sph -20051112_165916_483_fsp.sph -20051112_185651_487_fsp.sph -20051112_190443_488_fsp.sph -20051112_210205_489_fsp.sph -20051112_210631_490_fsp.sph -20051112_231502_491_fsp.sph -20051113_180809_493_fsp.sph -20051113_210908_497_fsp.sph -20051113_220433_499_fsp.sph -20051114_171942_502_fsp.sph -20051114_181118_504_fsp.sph -20051114_210412_506_fsp.sph -20051114_212032_507_fsp.sph -20051114_215057_508_fsp.sph -20051114_220412_509_fsp.sph -20051114_225557_510_fsp.sph -20051115_134012_511_fsp.sph -20051115_180301_512_fsp.sph -20051115_181412_513_fsp.sph -20051115_181731_514_fsp.sph -20051115_182149_515_fsp.sph -20051115_213551_517_fsp.sph -20051115_215935_518_fsp.sph -20051115_230749_520_fsp.sph -20051116_000221_521_fsp.sph -20051116_172353_522_fsp.sph -20051116_180237_524_fsp.sph -20051116_181228_525_fsp.sph -20051116_181816_526_fsp.sph -20051116_190450_527_fsp.sph -20051116_210146_528_fsp.sph -20051116_210553_529_fsp.sph -20051116_211222_530_fsp.sph -20051116_212312_531_fsp.sph -20051116_222454_532_fsp.sph -20051116_233038_533_fsp.sph -20051117_001013_534_fsp.sph -20051117_180234_535_fsp.sph -20051117_181844_537_fsp.sph -20051117_210156_538_fsp.sph -20051117_210403_539_fsp.sph -20051117_211540_540_fsp.sph -20051117_211833_541_fsp.sph -20051117_212855_542_fsp.sph -20051117_213407_543_fsp.sph -20051117_220412_544_fsp.sph -20051117_225943_545_fsp.sph -20051118_180619_547_fsp.sph -20051118_180739_548_fsp.sph -20051118_182114_549_fsp.sph -20051118_182652_550_fsp.sph -20051118_210212_551_fsp.sph -20051118_210455_552_fsp.sph -20051118_212058_553_fsp.sph -20051118_212829_554_fsp.sph -20051119_000355_555_fsp.sph -20051119_181105_556_fsp.sph -20051119_210802_557_fsp.sph -20051119_212315_559_fsp.sph -20051119_214926_560_fsp.sph -20051120_181008_561_fsp.sph -20051120_181339_562_fsp.sph -20051120_190412_563_fsp.sph -20051120_205645_565_fsp.sph -20051120_210347_566_fsp.sph -20051120_211526_567_fsp.sph -20051121_181138_569_fsp.sph -20051121_181357_570_fsp.sph -20051121_190155_571_fsp.sph -20051121_210922_573_fsp.sph -20051122_181114_574_fsp.sph -20051122_190326_576_fsp.sph -20051122_210253_577_fsp.sph -20051122_210703_578_fsp.sph -20051122_211805_579_fsp.sph -20051122_213037_580_fsp.sph -20051122_215430_581_fsp.sph -20051123_180926_582_fsp.sph -20051123_181644_583_fsp.sph -20051123_210214_584_fsp.sph -20051123_211514_585_fsp.sph -20051123_212412_586_fsp.sph -20051123_213259_587_fsp.sph -20051124_181720_588_fsp.sph -20051124_190336_589_fsp.sph -20051124_212221_591_fsp.sph -20051124_220457_592_fsp.sph -20051125_181632_593_fsp.sph -20051125_190327_594_fsp.sph -20051125_212150_595_fsp.sph -20051126_181804_597_fsp.sph -20051126_190347_598_fsp.sph -20051126_210222_599_fsp.sph -20051127_181335_601_fsp.sph -20051127_190405_602_fsp.sph -20051127_210516_603_fsp.sph -20051127_211200_604_fsp.sph -20051127_212516_605_fsp.sph -20051128_215149_608_fsp.sph -20051128_222007_609_fsp.sph -20051129_180204_610_fsp.sph -20051129_181241_612_fsp.sph -20051129_181547_613_fsp.sph -20051129_183449_614_fsp.sph -20051129_190152_615_fsp.sph -20051129_210218_616_fsp.sph -20051129_210342_617_fsp.sph -20051129_212711_618_fsp.sph -20051130_181543_619_fsp.sph -20051130_182626_620_fsp.sph -20051130_210202_622_fsp.sph -20051130_210910_623_fsp.sph -20051130_212724_626_fsp.sph -20051130_220121_627_fsp.sph -20051130_221538_628_fsp.sph -20051201_181034_630_fsp.sph -20051201_181303_631_fsp.sph -20051201_183429_632_fsp.sph -20051201_191426_633_fsp.sph -20051201_193415_634_fsp.sph -20051201_195005_635_fsp.sph -20051201_210713_636_fsp.sph -20051201_212329_637_fsp.sph -20051201_230640_638_fsp.sph -20051202_181119_639_fsp.sph -20051202_181659_640_fsp.sph -20051202_182058_641_fsp.sph -20051202_184713_642_fsp.sph -20051202_190154_643_fsp.sph -20051202_193515_644_fsp.sph -20051202_210252_645_fsp.sph -20051202_211824_646_fsp.sph -20051202_212105_647_fsp.sph -20051203_180701_649_fsp.sph -20051203_182100_650_fsp.sph -20051203_182132_651_fsp.sph -20051203_182418_652_fsp.sph -20051203_183501_653_fsp.sph -20051203_190503_654_fsp.sph -20051203_191125_655_fsp.sph -20051203_210216_656_fsp.sph -20051203_212114_658_fsp.sph -20051203_222533_661_fsp.sph -20051206_180753_662_fsp.sph -20051206_180911_663_fsp.sph -20051206_181649_664_fsp.sph -20051206_183057_665_fsp.sph -20051206_193937_667_fsp.sph -20051206_201757_668_fsp.sph -20051206_203158_669_fsp.sph -20051206_210127_670_fsp.sph -20051206_210744_671_fsp.sph -20051206_211522_672_fsp.sph -20051206_213252_673_fsp.sph -20051206_214122_674_fsp.sph -20051206_231328_675_fsp.sph -20051207_180507_676_fsp.sph -20051207_181020_677_fsp.sph -20051207_190155_678_fsp.sph -20051207_190426_679_fsp.sph -20051207_193103_681_fsp.sph -20051207_211858_683_fsp.sph -20051207_212300_684_fsp.sph -20051207_212831_685_fsp.sph -20051207_214411_686_fsp.sph -20051208_180208_687_fsp.sph -20051208_180810_688_fsp.sph -20051208_182430_689_fsp.sph -20051208_190333_690_fsp.sph -20051208_210609_691_fsp.sph -20051208_211702_692_fsp.sph -20051208_212444_694_fsp.sph -20051208_214100_696_fsp.sph -20051208_220606_697_fsp.sph -20051209_180824_699_fsp.sph -20051209_181542_700_fsp.sph -20051209_181642_701_fsp.sph -20051209_182541_702_fsp.sph -20051209_182858_703_fsp.sph -20051209_210136_704_fsp.sph -20051209_210452_705_fsp.sph -20051209_211542_706_fsp.sph -20051209_212515_707_fsp.sph -20051209_222427_709_fsp.sph -20051209_231702_710_fsp.sph -20051210_180659_711_fsp.sph -20051210_181201_712_fsp.sph -20051210_182013_713_fsp.sph -20051210_182603_714_fsp.sph -20051210_190201_715_fsp.sph -20051210_210535_717_fsp.sph -20051210_210735_718_fsp.sph -20051211_000414_719_fsp.sph -20051211_181346_720_fsp.sph -20051211_182045_721_fsp.sph -20051211_184252_723_fsp.sph -20051211_190523_724_fsp.sph -20051211_210240_725_fsp.sph -20051211_211415_726_fsp.sph -20051212_180251_727_fsp.sph -20051212_181817_728_fsp.sph -20051212_182453_729_fsp.sph -20051212_190335_730_fsp.sph -20051212_210527_731_fsp.sph -20051212_210738_732_fsp.sph -20051212_211419_733_fsp.sph -20051212_213447_734_fsp.sph -20051212_214512_735_fsp.sph -20051213_180254_736_fsp.sph -20051213_185913_737_fsp.sph -20051213_191741_738_fsp.sph -20051213_210120_739_fsp.sph -20051213_211552_741_fsp.sph -20051213_211953_742_fsp.sph -20051213_221424_743_fsp.sph -20051213_222016_744_fsp.sph -20051214_193942_746_fsp.sph -20051214_194606_747_fsp.sph -20051214_201000_748_fsp.sph -20051214_202717_749_fsp.sph -20051214_211653_750_fsp.sph -20051214_212318_751_fsp.sph -20051214_212718_752_fsp.sph -20051214_213225_753_fsp.sph -20051215_180855_754_fsp.sph -20051215_181731_755_fsp.sph -20051215_182213_756_fsp.sph -20051215_190143_757_fsp.sph -20051215_190419_758_fsp.sph -20051215_195526_759_fsp.sph -20051215_200925_760_fsp.sph -20051215_201639_761_fsp.sph -20051215_203848_762_fsp.sph -20051215_210410_764_fsp.sph -20051215_212456_766_fsp.sph -20051215_212701_767_fsp.sph -20051215_212749_768_fsp.sph -20051215_214814_769_fsp.sph -20051215_220537_770_fsp.sph -20051215_222306_771_fsp.sph -20051216_181042_773_fsp.sph -20051216_182340_774_fsp.sph -20051216_191101_775_fsp.sph -20051216_192823_776_fsp.sph -20051216_200153_777_fsp.sph -20051216_211423_778_fsp.sph -20051216_220626_779_fsp.sph -20051217_142547_780_fsp.sph -20051217_180231_781_fsp.sph -20051217_182026_783_fsp.sph -20051217_182330_784_fsp.sph -20051217_182530_785_fsp.sph -20051217_183115_786_fsp.sph -20051217_190226_787_fsp.sph -20051218_142845_790_fsp.sph -20051218_180353_791_fsp.sph -20051218_181751_792_fsp.sph -20051218_182127_793_fsp.sph -20051218_182750_794_fsp.sph -20051218_200401_799_fsp.sph -20051218_210249_800_fsp.sph -20051218_211820_801_fsp.sph -20051218_212444_802_fsp.sph -20051218_212813_803_fsp.sph -20051219_180225_804_fsp.sph -20051219_182110_806_fsp.sph -20051219_190625_808_fsp.sph -20051219_210655_812_fsp.sph -20051219_212218_813_fsp.sph -20051219_212716_814_fsp.sph -20051219_213203_815_fsp.sph -20051219_221213_816_fsp.sph -20051219_223123_817_fsp.sph -20051220_181731_820_fsp.sph -20051220_190121_821_fsp.sph -20051220_212400_826_fsp.sph -20051220_212718_828_fsp.sph -20051220_213420_829_fsp.sph -20051221_000417_830_fsp.sph -20051221_180958_831_fsp.sph -20051221_210452_840_fsp.sph -20051221_212325_841_fsp.sph -20051221_212911_842_fsp.sph -20051222_000436_843_fsp.sph -20051222_181242_845_fsp.sph -20051222_181506_846_fsp.sph -20051222_182617_847_fsp.sph -20051222_184209_849_fsp.sph -20051222_200553_850_fsp.sph -20051222_210309_852_fsp.sph -20051222_212425_855_fsp.sph -20051223_180346_856_fsp.sph -20051223_181050_857_fsp.sph -20051223_183105_860_fsp.sph -20051223_212547_863_fsp.sph -20051223_212853_864_fsp.sph -20051224_180302_865_fsp.sph -20051224_182949_867_fsp.sph -20051224_210150_870_fsp.sph -20051224_213010_871_fsp.sph -20051225_192042_872_fsp.sph -20051225_210556_873_fsp.sph -20051226_180908_874_fsp.sph -20051226_181659_875_fsp.sph -20051227_181058_885_fsp.sph -20051227_211308_887_fsp.sph -20051227_213029_888_fsp.sph -20051227_214843_889_fsp.sph -20051227_220309_890_fsp.sph -20051228_180249_891_fsp.sph -20051228_182051_892_fsp.sph -20051228_183955_893_fsp.sph -20051228_210524_896_fsp.sph -20051228_211808_897_fsp.sph -20051228_212304_899_fsp.sph -20051228_212734_900_fsp.sph -20051228_223227_901_fsp.sph -20051229_180231_902_fsp.sph -20051229_182614_906_fsp.sph -20051229_182631_907_fsp.sph -20051229_214024_909_fsp.sph -20051230_180457_910_fsp.sph -20051230_181721_912_fsp.sph -20051230_210412_913_fsp.sph -20051230_210559_914_fsp.sph -20051230_212557_915_fsp.sph -20051231_000808_916_fsp.sph -20060103_180314_917_fsp.sph -20060103_182107_918_fsp.sph -20060103_182257_919_fsp.sph -20060103_182549_920_fsp.sph -20060103_182654_921_fsp.sph -20060103_184037_922_fsp.sph -20060103_211504_925_fsp.sph -20060103_211732_926_fsp.sph -20060104_180509_928_fsp.sph -20060104_181040_929_fsp.sph -20060104_182115_930_fsp.sph -20060104_182644_931_fsp.sph -20060104_190448_933_fsp.sph -20060104_192707_934_fsp.sph -20060104_210223_935_fsp.sph -20060104_212844_936_fsp.sph -20060104_220148_937_fsp.sph -20060105_202127_943_fsp.sph -20060105_205957_944_fsp.sph -20060105_210951_945_fsp.sph -20060105_211743_946_fsp.sph -20060105_213129_947_fsp.sph -20060105_213243_948_fsp.sph -20060105_230711_949_fsp.sph -20060106_180202_950_fsp.sph -20060106_181040_951_fsp.sph -20060106_181726_952_fsp.sph -20060106_182909_953_fsp.sph -20060106_183056_954_fsp.sph -20060106_183550_955_fsp.sph -20060106_185224_956_fsp.sph -20060106_193129_957_fsp.sph -20060107_180634_960_fsp.sph -20060107_181553_961_fsp.sph -20060107_182715_962_fsp.sph -20060107_190206_963_fsp.sph -20060107_190415_964_fsp.sph -20060107_210435_966_fsp.sph -20060107_220739_967_fsp.sph -20060108_180630_968_fsp.sph -20060108_194731_971_fsp.sph -20060108_234917_976_fsp.sph -20060109_180448_977_fsp.sph -20060109_182557_979_fsp.sph -20060109_183636_980_fsp.sph -20060109_183727_981_fsp.sph -20060109_205815_982_fsp.sph -20060109_213409_986_fsp.sph -20060109_215138_987_fsp.sph -20060109_220315_988_fsp.sph -20060109_220535_989_fsp.sph -20060110_183405_995_fsp.sph -20060110_200611_998_fsp.sph -20060110_210730_1002_fsp.sph -20060110_213516_1004_fsp.sph -20060110_221920_1006_fsp.sph -20060110_230947_1007_fsp.sph -20060111_181650_1008_fsp.sph -20060111_182557_1009_fsp.sph -20060111_184916_1010_fsp.sph -20060111_192159_1012_fsp.sph -20060111_200345_1013_fsp.sph -20060111_210257_1014_fsp.sph -20060111_212145_1016_fsp.sph -20060111_213742_1017_fsp.sph -20060111_213936_1018_fsp.sph -20060111_230912_1020_fsp.sph -20060112_180639_1021_fsp.sph -20060112_182612_1022_fsp.sph -20060112_183346_1023_fsp.sph -20060112_183622_1024_fsp.sph -20060112_210747_1025_fsp.sph -20060112_211025_1026_fsp.sph -20060112_221010_1027_fsp.sph -20060112_221022_1028_fsp.sph -20060113_180159_1030_fsp.sph -20060113_183452_1033_fsp.sph -20060113_190403_1034_fsp.sph -20060113_213733_1036_fsp.sph -20060114_181137_1039_fsp.sph -20060114_181922_1040_fsp.sph -20060114_191056_1043_fsp.sph -20060114_213242_1044_fsp.sph -20060115_180421_1045_fsp.sph -20060115_183525_1047_fsp.sph -20060115_210217_1048_fsp.sph -20060115_212231_1051_fsp.sph -20060115_220504_1052_fsp.sph -20060115_232345_1053_fsp.sph -20060116_181908_1054_fsp.sph -20060116_182500_1055_fsp.sph -20060116_183201_1056_fsp.sph -20060116_184141_1057_fsp.sph -20060116_202324_1058_fsp.sph -20060116_204753_1059_fsp.sph -20060116_210217_1060_fsp.sph -20060116_211237_1061_fsp.sph -20060116_212845_1063_fsp.sph -20060116_220652_1064_fsp.sph -20060116_221118_1065_fsp.sph -20060117_181936_1068_fsp.sph -20060117_182604_1069_fsp.sph -20060117_185153_1071_fsp.sph -20060117_210138_1072_fsp.sph -20060117_210311_1073_fsp.sph -20060117_212546_1074_fsp.sph -20060118_180229_1076_fsp.sph -20060118_180647_1078_fsp.sph -20060118_182448_1079_fsp.sph -20060118_183010_1080_fsp.sph -20060118_190231_1082_fsp.sph -20060118_200148_1083_fsp.sph -20060118_205216_1084_fsp.sph -20060118_212907_1085_fsp.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/test b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/test deleted file mode 100644 index 0cbc3cc95fd..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/test +++ /dev/null @@ -1,20 +0,0 @@ -sp_0053.sph -sp_0082.sph -sp_0084.sph -sp_0088.sph -sp_0681.sph -sp_0699.sph -sp_0776.sph -sp_0857.sph -sp_1031.sph -sp_1100.sph -sp_1148.sph -sp_1156.sph -sp_1186.sph -sp_1212.sph -sp_1345.sph -sp_1435.sph -sp_1578.sph -sp_1648.sph -sp_1807.sph -sp_1847.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/train b/egs/fisher_callhome_spanish/s5_gigaword/local/splits/train deleted file mode 100644 index 2c936072534..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/splits/train +++ /dev/null @@ -1,80 +0,0 @@ -sp_0085.sph -sp_0096.sph -sp_0098.sph -sp_0100.sph -sp_0291.sph -sp_0713.sph -sp_0724.sph -sp_0726.sph -sp_0731.sph -sp_0733.sph -sp_0753.sph -sp_0788.sph -sp_0826.sph -sp_0831.sph -sp_0836.sph -sp_0841.sph -sp_0850.sph -sp_0855.sph -sp_0892.sph -sp_0899.sph -sp_0910.sph -sp_0917.sph -sp_0919.sph -sp_0923.sph -sp_0945.sph -sp_0950.sph -sp_0951.sph -sp_0992.sph -sp_0997.sph -sp_1013.sph -sp_1039.sph -sp_1044.sph -sp_1045.sph -sp_1058.sph -sp_1060.sph -sp_1063.sph -sp_1081.sph -sp_1106.sph -sp_1122.sph -sp_1140.sph -sp_1175.sph -sp_1195.sph -sp_1198.sph -sp_1231.sph -sp_1234.sph -sp_1255.sph -sp_1260.sph -sp_1261.sph -sp_1262.sph -sp_1264.sph -sp_1266.sph -sp_1273.sph -sp_1275.sph -sp_1284.sph -sp_1286.sph -sp_1304.sph -sp_1308.sph -sp_1333.sph -sp_1341.sph -sp_1353.sph -sp_1368.sph -sp_1379.sph -sp_1384.sph -sp_1449.sph -sp_1463.sph -sp_1574.sph -sp_1740.sph -sp_1759.sph -sp_1849.sph -sp_1908.sph -sp_1915.sph -sp_1918.sph -sp_1974.sph -sp_1976.sph -sp_1988.sph -sp_2000.sph -sp_2056.sph -sp_2070.sph -sp_2091.sph -sp_2101.sph diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl b/egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl deleted file mode 100755 index 03193384670..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/spron.pl +++ /dev/null @@ -1,304 +0,0 @@ -#!/usr/bin/env perl - -# Oct 21, 2015 : Gaurav Kumar (Johns Hopkins University) -# GNU General Public License, v3.0 -# -# This script was modified under GPL and is being distributed with -# Kaldi. It requires the preference and rule files -# (under LDC copyright) from LDC96L16. The main changes were -# - Outdated usage of perl conventions updated @_ => $_ or @A -# - This script no longer needs the preference and rule files to -# be in the same directory as this script. -# - Accepts tokens from instead of <> - -# --- Retained previous version information ---------------------------- -# spron.pl Version 0.1 Jan. 11 1995 -# Written by Zhibiao Wu, LDC, wzb@unagi.cis.upenn.edu -# This program needs the basic_rules file to run. The rules must be sorted -# in alphabetical order. The most specific rules should precede the more -# general ones. The conventions used in the basic rules are the same as -# regular expressions used in Perl. - -# Revised history: Feb. 10 1995 - -# The file "preferences" (assumed to be in your current directory) -# gives an "oracle" of correct pronunciations that override the -# machine-generated ones. - -# slightly changed 97/09/05 robertm: -# - look for basic_rules and preferences in $PWD instead of ~wzb/... -# - use next to shortcut loop instead of if/else -# - added a bit of documentation, without really trying to decipher this thing -# ----------------------------------------------------------------------- - -use utf8; -binmode(STDIN, ":utf8"); -binmode(STDOUT, ":utf8"); - -$vfile = ""; -$preference_file = ""; -$rules_file = ""; -$print_input = 0; -if ($#ARGV < 1) { - # Print Usage - print "Usage : local/spron.pl pref-file rules-file \n"; - exit 1; -} else { - $preference_file = $ARGV[0]; - $rules_file = $ARGV[1]; - if ($#ARGV > 1) { - $vfile = $ARGV[2]; - } - if ($#ARGV > 2) { - $print_input = 1; - } -} - -$rule_num = 0; -$previous = ""; -if ($vfile ne "") { - open(VF, $vfile) || die "Can't find file $vfile!\n"; - while () { - chop; - @A = split(//); - if (($A[0] ne '#') && ($_ ne "")) { - if (/(\S+)\s*->\s*(\S*)\s*:\s*(\S*)\s*__\s*(\S*)\s*(#?)/) { - $head[$rule_num] = $1; - $end[$rule_num] = $2; - $pre[$rule_num] = $3; - if ($4 =~ /#/) { - $nex[$rule_num] = ""; - $some[$rule_num] = $4; - } else { - $nex[$rule_num] = $4; - $some[$rule_num] = $5; - } - if ($previous ne substr($head[$rule_num],0,1)) { - $first{$head[$rule_num]} = $rule_num; - $last{$previous} = $rule_num - 1; - } - $previous = substr($head[$rule_num++],0,1); - } else { - print "Rule format error: Cannot parse $_\n"; - exit(1); - } - } - } - $last{$previous} = $rule_num - 1; - - close(VF); -} - -open(PF, $preference_file) || die "Can't read `preferences' file"; -binmode(PF, ":iso88591"); -while () { - chop; - if ($_ ne "") { - @A = split; - $pron{$A[0]} = $A[1]; - $stre{$A[0]} = $A[2]; - } -} - -$previous = ""; -$brule_num = 0; -open(BF, $rules_file) || die "Can't read `basic_rules' file"; -binmode(BF, ":iso88591"); -while () { - chop; - @A = split(//); - if (($A[0] ne '#') && ($_ ne "")) { - if (/(\S+)\s*->\s*(\S*)\s*:\s*(\S*)\s*__\s*(\S*)\s*(#?)/) { - $bhead[$brule_num] = $1; - $bend[$brule_num] = $2; - $bpre[$brule_num] = $3; - if ($4 =~ /#/) { - $bnex[$brule_num] = ""; - $bsome[$brule_num] = $4; - } else { - $bnex[$brule_num] = $4; - $bsome[$brule_num] = $5; - } - if ($previous ne substr($bhead[$brule_num],0,1)) { - $bfirst{substr($bhead[$brule_num],0,1)} = $brule_num; - $blast{$previous} = $brule_num - 1; - } - $previous = substr($bhead[$brule_num++],0,1); - } else { - print "Rule format error in file basic_rules: Cannot parse $_\n"; - exit(1); - } - } -} -$blast{$previous} = $brule_num - 1; -close(BF); - -if ($brule_num == 0) { - print "No basic rules, Program exit!\n"; - exit(1); -} - -while(){ - next if ((/^#/) || (/^\s*$/) ); - chop; - if ($print_input) { - print $_, "\t"; - } - if ($pron{$_}) { - # print answer from preferences and skip to next word - print "$pron{$_}\t$stre{$_}\n"; - next; - } - $original = $_; - tr/A-ZÁÉÍÓÚÏÜÑ/a-záéíóúïüñ/; - $orig = "#" . $_ . "#"; - - @l = (); - - push(@l,split("",$orig)); - - @pron = &transfer(1); - - foreach (@pron) { - $a = $_; - y/aeiouáéíóú//cd; - if ($_ eq "") { - print "#No stressable vowel in $original\n"; - } else { - s/[aeiou]/0/go; - s/[áéíóú]/1/go; - if (!/1/) { - if(length() == 1){ - s/\b./1/o; - } elsif($l[$#l - 1] =~ /[aeiouns]/o){ - s/00\b/10/o; - } else { - s/0\b/1/o; - } - } - - $a =~ s/á/a/g; - $a =~ s/é/e/g; - $a =~ s/í/i/g; - $a =~ s/ó/o/g; - $a =~ s/ú/u/g; - - print "$a\t$_\n"; - } - } -} - -sub transfer{ - local($_) = @_; - local(@p) = (); - local($s) = 0; - local($over) = 0; - local($i,$j,$k) = (0,0,0); - - if ($_ >= length($orig) - 1) { - push(@p, ""); - return(@p); - } else { - - if ($vfile ne "") { - for ($i= $first{substr($orig, $_, 1)}; - $i <= $last{substr($orig, $_, 1)} ; $i++) { - if (&matchv($_,$i)) { - $s = $_ + length($head[$i]); - foreach $w (&transfer($s)) { - push(@p, $end[$i] . $w); - if ($some[$i] ne "") { - $over = 0; - } else { - $over = 1; - } - } - } - } - } - - if ($over == 0 ) { - $i = $bfirst{substr($orig, $_, 1)}; - while (($i <= $blast{substr($orig, $_, 1)}) && ($over == 0)) { - if (&matchb($_,$i)) { - $over = 1; - $s = $_ + length($bhead[$i]); - foreach $w (&transfer($s)) { - push(@p, $bend[$i] . $w); - } - } - $i++; - } - if ($over == 0) { - $s = $_ + 1; - foreach $w (&transfer($s)) { - push(@p, substr($orig,$_,1) . $w); - } - } - } - - return(@p); - } -} - -sub matchv { - $h = $head[$_[1]]; - $p = $pre[$_[1]]; - $n = $nex[$_[1]]; - - return(&match($_[0],$h,$p,$n)); - -} - -sub matchb { - $h = $bhead[$_[1]]; - $p = $bpre[$_[1]]; - $n = $bnex[$_[1]]; - - return(&match($_[0],$h,$p,$n)); - -} - -sub match { - - if (substr($orig, $_[0], length($_[1])) eq $_[1]) { - return ( &match_n($_[0] + length($_[1]) - 1, $_[3]) && - &match_p($_[0], $_[2])); - } else { - return (0); - } -} - -sub match_p { - local($a) = $_[0]; - local($b) = $_[1]; - local($_); - - if ($b eq "" ) { - return (1); - } else { - $_ = substr($orig, 0, $a) . "!"; - if (/($b)!/) { - return(1); - } else { - return(0); - } - } -} - -sub match_n { - local($a) = $_[0]; - local($b) = $_[1]; - local($_); - - if ($b eq "" ) { - return (1); - } else { - $_ = "!" . substr($orig, $a + 1, length($orig) - $a - 1); - if (/!($b)/) { - return(1); - } else { - return(0); - } - } -} diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh deleted file mode 100755 index 9f5855d56c4..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/subset_data_prep.sh +++ /dev/null @@ -1,164 +0,0 @@ -#!/bin/bash -# -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# The input is a subset of the dataset in use. (*.sph files) -# In addition the transcripts are needed as well. -# This script is only called internally and should not be -# used for any other purpose. A similar script for general usage -# is local/fsp_data_prep.sh -# To be run from one directory above this script. - -stage=0 - -export LC_ALL=C - - -if [ $# -lt 4 ]; then - echo "Arguments should be the location of the Spanish Fisher Speech and Transcript Directories and the name of this partition -, and a list of files that belong to this partition . see ../run.sh for example." - exit 1; -fi - -subset=$3 -dir=`pwd`/data/local/$subset/data -mkdir -p $dir -local=`pwd`/local -utils=`pwd`/utils -tmpdir=`pwd`/data/local/tmp -mkdir -p $tmpdir - -. ./path.sh || exit 1; # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi -cd $dir - -# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command -# line arguments being absolute pathnames. -rm -r links/ 2>/dev/null -mkdir links/ -mkdir links/speech -mkdir links/transcripts -if [ ! -f $4 ]; then - echo "Please specify a valid parition file. Could not find $4" - exit 1; -fi -cat $4 | sed 's:.*/::g' | \ -xargs -I % find $1/ -name %* | xargs -I % echo cp % links/ - -# Basic spot checks to see if we got the data that we needed -if [ ! -d links/LDC2010S01 -o ! -d links/LDC2010T04 ]; -then - echo "The speech and the data directories need to be named LDC2010S01 and LDC2010T04 respecti -vely" - exit 1; -fi - -if [ ! -d links/LDC2010S01/DISC1/data/speech -o ! -d links/LDC2010S01/DISC2/data/speech ]; -then - echo "Disc 1 and 2 directories missing or not properly organised within the speech data dir" - echo "Typical format is LDC2010S01/DISC?/data/speech" - exit 1; -fi - -#Check the transcripts directories as well to see if they exist -if [ ! -d links/LDC2010T04/data/transcripts ]; -then - echo "Transcript directories missing or not properly organised" - echo "Typical format is LDC2010T04/data/transcripts" - exit 1; -fi - -speech_d1=$dir/links/LDC2010S01/DISC1/data/speech -speech_d2=$dir/links/LDC2010S01/DISC2/data/speech -transcripts=$dir/links/LDC2010T04/data/transcripts - -fcount_d1=`find ${speech_d1} -iname '*.sph' | wc -l` -fcount_d2=`find ${speech_d2} -iname '*.sph' | wc -l` -fcount_t=`find ${transcripts} -iname '*.tdf' | wc -l` -#TODO:it seems like not all speech files have transcripts -#Now check if we got all the files that we needed -if [ $fcount_d1 != 411 -o $fcount_d2 != 408 -o $fcount_t != 819 ]; -then - echo "Incorrect number of files in the data directories" - echo "DISC1 and DISC2 should contain 411 and 408 .sph files respectively" - echo "The transcripts should contain 819 files" - exit 1; -fi - -if [ $stage -le 0 ]; then - #Gather all the speech files together to create a file list - #TODO: Train and test split might be required - ( - find $speech_d1 -iname '*.sph'; - find $speech_d2 -iname '*.sph'; - ) > $tmpdir/train_sph.flist - - #Get all the transcripts in one place - find $transcripts -iname '*.tdf' > $tmpdir/train_transcripts.flist -fi - -if [ $stage -le 1 ]; then - $local/fsp_make_trans.pl $tmpdir - mkdir -p $dir/train_all - mv $tmpdir/reco2file_and_channel $dir/train_all/ -fi - -if [ $stage -le 2 ]; then - sort $tmpdir/text.1 | grep -v '((' | \ - awk '{if (NF > 1){ print; }}' | \ - sed 's:<\s*[/]*\s*\s*for[ei][ei]g[nh]\s*\w*>::g' | \ - sed 's:\([^<]*\)<\/lname>:\1:g' | \ - sed 's:::g' | \ - sed 's:[^<]*<\/laugh>:[laughter]:g' | \ - sed 's:<\s*cough[\/]*>:[noise]:g' | \ - sed 's::[noise]:g' | \ - sed 's::[noise]:g' | \ - sed 's::[noise]:g' | \ - sed 's:[^<]*<\/background>:[noise]:g' | \ - sed -r 's:<[/]?background[/]?>:[noise]:g' | \ - #One more time to take care of nested stuff - sed 's:[^<]*<\/laugh>:[laughter]:g' | \ - sed -r 's:<[/]?laugh[/]?>:[laughter]:g' | \ - #now handle the exceptions, find a cleaner way to do this? - sed 's:::g' | \ - sed 's:::g' | \ - sed 's:foreign>::g' | \ - sed 's:>::g' | \ - #How do you handle numbers? - grep -v '()' | \ - #Now go after the non-printable characters - sed -r 's:¿::g' > $tmpdir/text.2 - cp $tmpdir/text.2 $dir/train_all/text - - #Create segments file and utt2spk file - ! cat $dir/train_all/text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > $dir/train_all/utt2spk \ - && echo "Error producing utt2spk file" && exit 1; - - cat $dir/train_all/text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; - $s = sprintf("%.2f", 0.01*$3); $e = sprintf("%.2f", 0.01*$4); print "$utt $reco $s $e\n"; ' >$dir/train_all/segments - - $utils/utt2spk_to_spk2utt.pl <$dir/train_all/utt2spk > $dir/train_all/spk2utt -fi - -if [ $stage -le 3 ]; then - cat $tmpdir/train_sph.flist | perl -ane 'm:/([^/]+)\.sph$: || die "bad line $_; "; print "$1 $_"; ' > $tmpdir/sph.scp - cat $tmpdir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ - sort -k1,1 -u > $dir/train_all/wav.scp || exit 1; -fi - -if [ $stage -le 4 ]; then - # Build the speaker to gender map, the temporary file with the speaker in gender information is already created by fsp_make_trans.pl. - cat $tmpdir/spk2gendertmp | sort | uniq > $dir/train_all/spk2gender -fi - -echo "Fisher Spanish Data preparation succeeded." - -exit 1; - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py b/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py deleted file mode 100755 index ce83fa8c8aa..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_1_best.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -import os -import sys - -files = [ -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-1/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-2/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-3/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-4/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-5/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-6/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-7/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-8/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-9/exp/tri5a/decode_test/scoring/13.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-10/exp/tri5a/decode_test/scoring/13.tra')] - -def findTranscription(timeDetail): - - for file1 in files: - file1.seek(0,0) - for line in file1: - lineComp = line.split() - if lineComp[0] == timeDetail: - return " ".join(lineComp[1:]) - # No result found - return -1 - - -wordsFile = open('exp/tri5a/graph/words.txt') -words = {} - -# Extract word list -for line in wordsFile: - lineComp = line.split() - words[int(lineComp[1])] = lineComp[0].strip() - -# Now read list of files in conversations -fileList = [] -#conversationList = open('/export/a04/gkumar/corpora/fishcall/joshkal-splits/provisional_dev') -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/train') -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# TODO: Make sure they match the order in which these english files are being written - -# Now get timing information to concatenate the ASR outputs -if not os.path.exists('exp/tri5a/one-best/train'): - os.makedirs('exp/tri5a/one-best/train') - -#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.train', 'w+') -for item in fileList: - timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') - newFile = open('exp/tri5a/one-best/train/' + item + '.es', 'w+') - for line in timingFile: - timeInfo = line.split() - mergedTranslation = "" - for timeDetail in timeInfo: - #Locate this in ASR dev/test, this is going to be very slow - tmp = findTranscription(timeDetail) - if tmp != -1: - mergedTranslation = mergedTranslation + " " + tmp - mergedTranslation = mergedTranslation.strip() - transWords = [words[int(x)] for x in mergedTranslation.split()] - newFile.write(" ".join(transWords) + "\n") - provFile.write(" ".join(transWords) + "\n") - newFile.close() -provFile.close() - - - - - - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py b/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py deleted file mode 100755 index b9f906b27da..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_get_lattices.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -from __future__ import print_function -import os -import sys -import subprocess - -latticeLocation = {1:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-1/latjosh-2/lattices-pushed/", -2:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-2/latjosh-2/lattices-pushed/", -3:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-3/latjosh-2/lattices-pushed/", -4:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-4/latjosh-2/lattices-pushed/", -5:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-5/latjosh-2/lattices-pushed/", -6:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-6/latjosh-2/lattices-pushed/", -7:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-7/latjosh-2/lattices-pushed/", -8:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-8/latjosh-2/lattices-pushed/", -9:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-9/latjosh-2/lattices-pushed/", -10:"/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-10/latjosh-2/lattices-pushed/"} - -latticeDict = {} - -for key,location in latticeLocation.items(): - for root, dirs, filenames in os.walk(location): - for f in filenames: - latticeDict[f] = str(key) - -tmpdir = 'data/local/data/tmp/lattmp' -if not os.path.exists(tmpdir): - os.makedirs(tmpdir) -invalidplfdir = 'data/local/data/tmp/invalidplf' -if not os.path.exists(invalidplfdir): - os.makedirs(invalidplfdir) -else: - os.system("rm " + invalidplfdir + "/*") - -def latticeConcatenate(lat1, lat2): - ''' - Concatenates lattices, writes temporary results to tmpdir - ''' - if lat1 == "": - if os.path.exists('rm ' + tmpdir + '/tmp.lat'): - os.system('rm ' + tmpdir + '/tmp.lat') - return lat2 - else: - proc = subprocess.Popen(['fstconcat', lat1, lat2, (tmpdir + '/tmp.lat')]) - proc.wait() - return tmpdir + '/tmp.lat' - - -def findLattice(timeDetail): - ''' - Finds the lattice corresponding to a time segment - ''' - searchKey = timeDetail + '.lat' - if searchKey in latticeDict: - return "/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-" + latticeDict[searchKey] + "/latjosh-2/lattices-pushed/" + searchKey - else: - return -1 - - -# Now read list of files in conversations -fileList = [] -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/train') -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# Now get timing information to concatenate the ASR outputs - -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.train.plf', 'w+') -lineNo = 1 -invalidPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/invalidPLF', 'w+') -blankPLF = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/blankPLF', 'w+') -rmLines = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/removeLines', 'w+') -for item in fileList: - timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') - for line in timingFile: - timeInfo = line.split() - - # For utterances that are concatenated in the translation file, - # the corresponding FSTs have to be translated as well - mergedTranslation = "" - for timeDetail in timeInfo: - tmp = findLattice(timeDetail) - if tmp != -1: - # Concatenate lattices - mergedTranslation = latticeConcatenate(mergedTranslation, tmp) - - if mergedTranslation != "": - - # Sanjeev's Recipe : Remove epsilons and topo sort - finalFST = tmpdir + "/final.fst" - os.system("fstrmepsilon " + mergedTranslation + " | fsttopsort - " + finalFST) - - # Now convert to PLF - proc = subprocess.Popen('/export/a04/gkumar/corpora/fishcall/bin/fsm2plf.sh /export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-matt/data/lang/words.clean.txt ' + finalFST, stdout=subprocess.PIPE, shell=True) - PLFline = proc.stdout.readline() - finalPLFFile = tmpdir + "/final.plf" - finalPLF = open(finalPLFFile, "w+") - finalPLF.write(PLFline) - finalPLF.close() - - # now check if this is a valid PLF, if not write it's ID in a - # file so it can be checked later - proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True) - line = proc.stdout.readline() - print("{} {}".format(line, lineNo)) - if line.strip() != "PLF format appears to be correct.": - os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0]) - invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n") - rmLines.write("{}\n".format(lineNo)) - else: - provFile.write(PLFline) - else: - blankPLF.write(timeInfo[0] + "\n") - rmLines.write("{}\n".format(lineNo)) - # Now convert to PLF - lineNo += 1 - -provFile.close() -invalidPLF.close() -blankPLF.close() -rmLines.close() diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh b/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh deleted file mode 100755 index b8b3ca35ef9..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_pocolm.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -stage=-2 -num_words_pocolm=110000 -prune_size=1000000 - -. ./path.sh -. ./cmd.sh -. ./utils/parse_options.sh - -set -euo pipefail - -export POCOLM_ROOT=$(cd $KALDI_ROOT/tools/pocolm/; pwd -P) -export PATH=$PATH:$POCOLM_ROOT/scripts - -textdir=$1 -pocolm_dir=$2 - - -if [ $stage -le -2 ]; then - echo "****" - echo " POCOLM experiment : Running STAGE 1 : 2-gram Pocolm general closed vocabulary model" - echo " Will estimate the metaparams to be used as unigram weights for stage 2 ....." - echo "****" - if [ -e "$textdir"/unigram_weights ]; then - rm "$textdir"/unigram_weights - fi - if [ -e "$pocolm_dir" ]; then - rm -r "$pocolm_dir" - fi - - bash local/pocolm_cust.sh --num-word 0 --ngram-order 2 --pocolm-stage 1 --lm-dir "$pocolm_dir"/lm \ - --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" - -fi - -if [ $stage -le -1 ];then - echo "********" - echo "POCOLM experiment : RUNNING STAGE 2 : 3gram POCOLM using unigram wts estimates in 1st stage....." - echo "********" - - echo " " > "$pocolm_dir"/lm/work/.unigram_weights.done - python local/get_unigram_weights_vocab.py "$pocolm_dir"/lm/0_2.pocolm/ "$textdir"/unigram_weights - bash local/pocolm_cust.sh --num-word "$num_words_pocolm" --lm-dir "$pocolm_dir"/lm \ - --arpa-dir "$pocolm_dir"/arpa --textdir "$textdir" - prune_lm_dir.py --target-num-ngrams=$prune_size "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm \ - "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned_"$prune_size" - mkdir -p "$pocolm_dir"/arpa - format_arpa_lm.py "$pocolm_dir"/lm/"$num_words_pocolm"_3.pocolm_pruned_"$prune_size" | \ - gzip -c > "$pocolm_dir"/arpa/"$num_words_pocolm"_3_pruned_"$prune_size".arpa.gz -fi - - -exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py b/egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py deleted file mode 100755 index 3f6444da294..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/train_process_oracle.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python -# Copyright 2014 Gaurav Kumar. Apache 2.0 - -import os -import sys - -files = [ -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-1/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-2/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-3/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-4/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-5/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-6/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-7/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-8/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-9/exp/tri5a/decode_test/oracle/oracle.tra'), -open('/export/a04/gkumar/kaldi-trunk/egs/fishcall_es/j-10/exp/tri5a/decode_test/oracle/oracle.tra')] - -def findTranscription(timeDetail): - - for file1 in files: - file1.seek(0,0) - for line in file1: - lineComp = line.split() - if lineComp[0] == timeDetail: - return " ".join(lineComp[1:]) - # No result found - return -1 - - -wordsFile = open('exp/tri5a/graph/words.txt') -words = {} - -# Extract word list -for line in wordsFile: - lineComp = line.split() - words[int(lineComp[1])] = lineComp[0].strip() - -# Now read list of files in conversations -fileList = [] -#conversationList = open('/export/a04/gkumar/corpora/fishcall/joshkal-splits/provisional_dev') -conversationList = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/train') -for line in conversationList: - line = line.strip() - line = line[:-4] - fileList.append(line) - -# IN what order were the conversations added to the spanish files? -# TODO: Make sure they match the order in which these english files are being written - -# Now get timing information to concatenate the ASR outputs -if not os.path.exists('exp/tri5a/one-best/train'): - os.makedirs('exp/tri5a/one-best/train') - -#provFile = open('/export/a04/gkumar/corpora/fishcall/fisher_provisional_dev.es', 'w+') -provFile = open('/export/a04/gkumar/corpora/fishcall/jack-splits/split-matt/asr.train.oracle', 'w+') -for item in fileList: - timingFile = open('/export/a04/gkumar/corpora/fishcall/fisher/tim/' + item + '.es') - newFile = open('exp/tri5a/one-best/train/' + item + '.es', 'w+') - for line in timingFile: - timeInfo = line.split() - mergedTranslation = "" - for timeDetail in timeInfo: - #Locate this in ASR dev/test, this is going to be very slow - tmp = findTranscription(timeDetail) - if tmp != -1: - mergedTranslation = mergedTranslation + " " + tmp - mergedTranslation = mergedTranslation.strip() - transWords = [words[int(x)] for x in mergedTranslation.split()] - newFile.write(" ".join(transWords) + "\n") - provFile.write(" ".join(transWords) + "\n") - newFile.close() -provFile.close() - - - - - - diff --git a/egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter b/egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter deleted file mode 100755 index 4fce42945b3..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/local/wer_output_filter +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sed -f -s:\[laughter\]::g -s:\[noise\]::g -s:\[oov\]::g -s:::g diff --git a/egs/fisher_callhome_spanish/s5_gigaword/path.sh b/egs/fisher_callhome_spanish/s5_gigaword/path.sh deleted file mode 100755 index 2993311fd90..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/path.sh +++ /dev/null @@ -1,13 +0,0 @@ -export KALDI_ROOT=`pwd`/../../../ -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH -[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 -. $KALDI_ROOT/tools/config/common_path.sh -export LD_LIBRARY_PATH=/home/dpovey/libs - -export SPARROWHAWK_ROOT=$KALDI_ROOT/tools/sparrowhawk -export PATH=$SPARROWHAWK_ROOT/bin:$PATH -export LC_ALL=C -export LANG=C - -source ~/anaconda/bin/activate py36 diff --git a/egs/fisher_callhome_spanish/s5_gigaword/rnnlm b/egs/fisher_callhome_spanish/s5_gigaword/rnnlm deleted file mode 120000 index fb754622d5e..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/rnnlm +++ /dev/null @@ -1 +0,0 @@ -../../wsj/s5/rnnlm \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5_gigaword/run.sh b/egs/fisher_callhome_spanish/s5_gigaword/run.sh deleted file mode 100755 index 95425c29034..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/run.sh +++ /dev/null @@ -1,310 +0,0 @@ -#!/bin/bash -# -# Copyright 2018 Nagendra Goel, Saikiran Valluri Apache 2.0 -# Copyright 2014 Gaurav Kumar. Apache 2.0 -# Recipe for Fisher/Callhome-Spanish - -stage=-1 -lmstage=-2 -train_rnnlm=false -start_textcleanup=false # WARNING : IT starts from flattening gigaword corpus to preparing text folder. - # If you already have the normalised gigword text somewhere, you can bypass the - # time consuming text cleanup (~1 week) by setting this option false. -addtraintext=true # If true, this option appends the Fisher train text to the Gigaword corpus textfile, to - # perform the A, A + G, Dev type POCOLM training configuration. - # A=fsp train, G=gigword text, -num_words_pocolm=110000 -train_sgmm2=false - -# call the next line with the directory where the Spanish Fisher data is -# (the values below are just an example). -sfisher_speech=/export/corpora/LDC/LDC2010S01 -sfisher_transcripts=/export/c03/svalluri//LDC2010T04 -spanish_lexicon=/export/corpora/LDC/LDC96L16 -split=local/splits/split_fisher - -callhome_speech=/export/corpora/LDC/LDC96S35 -callhome_transcripts=/export/corpora/LDC/LDC96T17 -split_callhome=local/splits/split_callhome - -gigaword_datapath=/export/c03/svalluri/Spanish_gigaword/data -rnnlm_workdir=workdir_rnnlm_Spanish_08032019 -mfccdir=`pwd`/mfcc - -. ./cmd.sh -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - -set -eou pipefail - -if [ $stage -le -1 ]; then - local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts - - local/callhome_data_prep.sh $callhome_speech $callhome_transcripts - - # The lexicon is created using the LDC spanish lexicon, the words from the - # fisher spanish corpus. Additional (most frequent) words are added from the - # ES gigaword corpus to bring the total to 64k words. The ES frequency sorted - # wordlist is downloaded if it is not available. - local/fsp_prepare_dict.sh $spanish_lexicon - # Let's keep the original dict copy for G2P training - cp -r data/local/dict data/local/dict_orig - ( - steps/dict/train_g2p_seq2seq.sh data/local/dict_orig/lexicon.txt exp/g2p || touch exp/g2p/.error - ) & - - # Added c,j, v to the non silences phones manually - utils/prepare_lang.sh data/local/dict_orig "" data/local/lang_orig data/lang_orig - - utils/fix_data_dir.sh data/local/data/train_all - - steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/local/data/train_all exp/make_mfcc/train_all $mfccdir || exit 1; - - utils/fix_data_dir.sh data/local/data/train_all - utils/validate_data_dir.sh data/local/data/train_all - - cp -r data/local/data/train_all data/train_all - - # For the CALLHOME corpus - utils/fix_data_dir.sh data/local/data/callhome_train_all - - steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/local/data/callhome_train_all exp/make_mfcc/callhome_train_all $mfccdir || exit 1; - - utils/fix_data_dir.sh data/local/data/callhome_train_all - utils/validate_data_dir.sh data/local/data/callhome_train_all - - cp -r data/local/data/callhome_train_all data/callhome_train_all - - local/create_splits.sh $split - local/callhome_create_splits.sh $split_callhome - -fi - -if $start_textcleanup; then - echo "WARNING : Starting from cleaning up and normalizing the Gigword text" - echo " This might take few days........... You can opt out this stage " - echo " by setting start_textcleanup=false, and having text_lm ready inside rnnlm_workdir." - - if [ $stage -le 0 ]; then - mkdir -p "$rnnlm_workdir"/gigaword_rawtext - local/flatten_gigaword/flatten_all_gigaword.sh "$gigaword_datapath" "$rnnlm_workdir"/flattened_gigaword_corpus 24 - cat "$rnnlm_workdir"/flattened_gigaword_corpus/*.flat > "$rnnlm_workdir"/gigaword_rawtext/in.txt - local/clean_txt_dir.sh "$rnnlm_workdir"/gigaword_rawtext/ \ - "$rnnlm_workdir"/normalised_gigaword_corpus/ - mkdir -p "$rnnlm_workdir"/text_lm - cut -d " " -f 2- data/train/text > "$rnnlm_workdir"/text_lm/train.txt - cut -d " " -f 2- data/dev2/text > "$rnnlm_workdir"/text_lm/dev.txt # For RNNLM and POCOLM training we use dev2/text as dev file. - cp "$rnnlm_workdir"/normalised_gigaword_corpus/text_normalized "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt - if $addtraintext; then - cat "$rnnlm_workdir"/text_lm/train.txt >> "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt - fi - fi -fi - -if [ $stage -le 1 ]; then - local/train_pocolm.sh --stage $lmstage --num-words-pocolm $num_words_pocolm "$rnnlm_workdir"/text_lm/ "$rnnlm_workdir"/pocolm - local/get_rnnlm_wordlist.py data/lang_orig/words.txt "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm/words.txt \ - "$rnnlm_workdir"/rnnlm_wordlist "$rnnlm_workdir"/oov_pocolmwords - if $train_rnnlm; then - local/rnnlm.sh --stage $lmstage --dir "$rnnlm_workdir"/rnnlm --pocolm-dir "$rnnlm_workdir"/pocolm/lm/"$num_words_pocolm"_3.pocolm \ - --wordslist "$rnnlm_workdir"/rnnlm_wordlist --text-dir "$rnnlm_workdir"/text_lm - fi -fi - - -if [ $stage -le 2 ]; then - wait # wait till G2P training finishes - if [ -f exp/g2p/.error ]; then - rm exp/g2p/.error || true - echo "Fail to train the G2P model." && exit 1; - fi - steps/dict/apply_g2p_seq2seq.sh "$rnnlm_workdir"/oov_pocolmwords exp/g2p "$rnnlm_workdir"/oov_g2p.lex - cat "$rnnlm_workdir"/oov_g2p.lex/lexicon.lex data/local/dict/lexicon.txt | sed "/^$/d" |sort | uniq > "$rnnlm_workdir"/lexicon_extended.txt - cp "$rnnlm_workdir"/lexicon_extended.txt data/local/dict/lexicon.txt # Replacing original lexicon with extended version. - - utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang - - # Make sure that you do not use your test and your dev sets to train the LM - # Some form of cross validation is possible where you decode your dev/set based on an - # LM that is trained on everything but that that conversation - # When in doubt about what your data partitions should be use local/fsp_ideal_data_partitions.pl - # to get the numbers. Depending on your needs, you might have to change the size of - # the splits within that file. The default paritions are based on the Kaldi + Joshua - # requirements which means that I have very large dev and test sets - local/fsp_train_lms.sh $split - local/fsp_create_test_lang.sh - - # Now compute CMVN stats for the train, dev and test subsets - steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir - steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir - steps/compute_cmvn_stats.sh data/dev2 exp/make_mfcc/dev2 $mfccdir - #steps/compute_cmvn_stats.sh data/mt_train exp/make_mfcc/mt_train $mfccdir - #steps/compute_cmvn_stats.sh data/mt_test exp/make_mfcc/mt_test $mfccdir - - #n=$[`cat data/train_all/segments | wc -l` - 158126] - #utils/subset_data_dir.sh --last data/train_all $n data/train - steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir - - steps/compute_cmvn_stats.sh data/callhome_dev exp/make_mfcc/callhome_dev $mfccdir - steps/compute_cmvn_stats.sh data/callhome_test exp/make_mfcc/callhome_test $mfccdir - steps/compute_cmvn_stats.sh data/callhome_train exp/make_mfcc/callhome_train $mfccdir - - # Again from Dan's recipe : Reduced monophone training data - # Now-- there are 1.6 million utterances, and we want to start the monophone training - # on relatively short utterances (easier to align), but not only the very shortest - # ones (mostly uh-huh). So take the 100k shortest ones, and then take 10k random - # utterances from those. - - utils/subset_data_dir.sh --shortest data/train 90000 data/train_100kshort - utils/subset_data_dir.sh data/train_100kshort 10000 data/train_10k - utils/data/remove_dup_utts.sh 100 data/train_10k data/train_10k_nodup - utils/subset_data_dir.sh --speakers data/train 30000 data/train_30k - utils/subset_data_dir.sh --speakers data/train 90000 data/train_100k -fi - -if [ $stage -le 3 ]; then - steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ - data/train_10k_nodup data/lang exp/mono0a - - steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train_30k data/lang exp/mono0a exp/mono0a_ali || exit 1; - - steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 20000 data/train_30k data/lang exp/mono0a_ali exp/tri1 || exit 1; - - - (utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph - steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri1/graph data/dev exp/tri1/decode_dev)& - - steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train_30k data/lang exp/tri1 exp/tri1_ali || exit 1; - - steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 20000 data/train_30k data/lang exp/tri1_ali exp/tri2 || exit 1; - - ( - utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; - steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1; - )& -fi - -if [ $stage -le 4 ]; then - steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train_100k data/lang exp/tri2 exp/tri2_ali || exit 1; - -# Train tri3a, which is LDA+MLLT, on 100k data. - steps/train_lda_mllt.sh --cmd "$train_cmd" \ - --splice-opts "--left-context=3 --right-context=3" \ - 3000 40000 data/train_100k data/lang exp/tri2_ali exp/tri3a || exit 1; - ( - utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; - steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; - )& -fi - -if [ $stage -le 5 ]; then -# Next we'll use fMLLR and train with SAT (i.e. on -# fMLLR features) - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_100k data/lang exp/tri3a exp/tri3a_ali || exit 1; - - steps/train_sat.sh --cmd "$train_cmd" \ - 4000 60000 data/train_100k data/lang exp/tri3a_ali exp/tri4a || exit 1; - - ( - utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri4a/graph data/dev exp/tri4a/decode_dev -)& - - - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri4a exp/tri4a_ali || exit 1; - -# Reduce the number of gaussians - steps/train_sat.sh --cmd "$train_cmd" \ - 5000 120000 data/train data/lang exp/tri4a_ali exp/tri5a || exit 1; - - ( - utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/dev exp/tri5a/decode_dev - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/test exp/tri5a/decode_test - - # Decode CALLHOME - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/callhome_test exp/tri5a/decode_callhome_test - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/callhome_dev exp/tri5a/decode_callhome_dev - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/callhome_train exp/tri5a/decode_callhome_train - ) & - - - steps/align_fmllr.sh \ - --boost-silence 0.5 --nj 32 --cmd "$train_cmd" \ - data/train data/lang exp/tri5a exp/tri5a_ali -fi - -if $train_sgmm2; then - -steps/train_ubm.sh \ - --cmd "$train_cmd" 750 \ - data/train data/lang exp/tri5a_ali exp/ubm5 - -steps/train_sgmm2.sh \ - --cmd "$train_cmd" 5000 18000 \ - data/train data/lang exp/tri5a_ali exp/ubm5/final.ubm exp/sgmm5 - -utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph - -( - steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \ - --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \ - exp/sgmm5/graph data/dev exp/sgmm5/decode_dev -)& - -steps/align_sgmm2.sh \ - --nj 32 --cmd "$train_cmd" --transform-dir exp/tri5a_ali \ - --use-graphs true --use-gselect true \ - data/train data/lang exp/sgmm5 exp/sgmm5_ali - -steps/make_denlats_sgmm2.sh \ - --nj 32 --sub-split 32 --num-threads 4 \ - --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali \ - data/train data/lang exp/sgmm5_ali exp/sgmm5_denlats - -steps/train_mmi_sgmm2.sh \ - --cmd "$train_cmd" --drop-frames true --transform-dir exp/tri5a_ali --boost 0.1 \ - data/train data/lang exp/sgmm5_ali exp/sgmm5_denlats \ - exp/sgmm5_mmi_b0.1 - -( -utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph -steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ - --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 12"\ - exp/tri5a/graph data/dev exp/tri5a/decode_dev -utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph -steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \ - --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \ - exp/sgmm5/graph data/dev exp/sgmm5/decode_dev -for iter in 1 2 3 4; do - decode=exp/sgmm5_mmi_b0.1/decode_dev_it$iter - mkdir -p $decode - steps/decode_sgmm2_rescore.sh \ - --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5a/decode_dev \ - data/lang_test data/dev/ exp/sgmm5/decode_dev $decode -done -) & -fi - -wait; - -if [ $stage -le 6 ]; then - local/chain/run_tdnn_1g.sh --stage 0 --gigaword-workdir $rnnlm_workdir || exit 1; -fi -exit 0; diff --git a/egs/fisher_callhome_spanish/s5_gigaword/steps b/egs/fisher_callhome_spanish/s5_gigaword/steps deleted file mode 120000 index 1b186770dd1..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/steps +++ /dev/null @@ -1 +0,0 @@ -../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5_gigaword/utils b/egs/fisher_callhome_spanish/s5_gigaword/utils deleted file mode 120000 index a3279dc8679..00000000000 --- a/egs/fisher_callhome_spanish/s5_gigaword/utils +++ /dev/null @@ -1 +0,0 @@ -../../wsj/s5/utils/ \ No newline at end of file From f810119b7a0f93f9aa3b3d2d387cd113248fafa1 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Tue, 2 Apr 2019 10:48:01 -0400 Subject: [PATCH 122/235] Small cleanup for scripts format --- egs/fisher_callhome_spanish/s5/cmd.sh | 4 ++-- egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh | 6 +++--- egs/fisher_callhome_spanish/s5/steps | 2 +- egs/fisher_callhome_spanish/s5/utils | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5/cmd.sh b/egs/fisher_callhome_spanish/s5/cmd.sh index db97f1fbc6f..88db78823a5 100755 --- a/egs/fisher_callhome_spanish/s5/cmd.sh +++ b/egs/fisher_callhome_spanish/s5/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="retry.pl queue.pl --mem 8G" -export decode_cmd="retry.pl queue.pl --mem 8G" +export train_cmd="queue.pl --mem 4G" +export decode_cmd="queue.pl --mem 4G" export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh index 2f478419a18..9e9e6efe7df 100755 --- a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh +++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh @@ -30,7 +30,7 @@ reporting_email= gigaword_workdir= # LSTM/chain options -train_stage=-20 +train_stage=-10 xent_regularize=0.1 dropout_schedule='0,0@0.20,0.3@0.50,0' @@ -157,7 +157,7 @@ if [ $stage -le 19 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" @@ -202,7 +202,7 @@ fi if [ $stage -le 20 ]; then - if [[ $(hostname -f) == *.clsp.joujhu.edu ]] && [ ! -d $dir/egs/storage ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi diff --git a/egs/fisher_callhome_spanish/s5/steps b/egs/fisher_callhome_spanish/s5/steps index 1b186770dd1..6e99bf5b5ad 120000 --- a/egs/fisher_callhome_spanish/s5/steps +++ b/egs/fisher_callhome_spanish/s5/steps @@ -1 +1 @@ -../../wsj/s5/steps/ \ No newline at end of file +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5/utils b/egs/fisher_callhome_spanish/s5/utils index a3279dc8679..b240885218f 120000 --- a/egs/fisher_callhome_spanish/s5/utils +++ b/egs/fisher_callhome_spanish/s5/utils @@ -1 +1 @@ -../../wsj/s5/utils/ \ No newline at end of file +../../wsj/s5/utils \ No newline at end of file From ffbe16bc67951a040f1fb5ff72ba78d1e23cc0a0 Mon Sep 17 00:00:00 2001 From: jyhnnhyj <48015613+jyhnnhyj@users.noreply.github.com> Date: Wed, 3 Apr 2019 17:55:10 +0200 Subject: [PATCH 123/235] [egs] Update Tedlium s5_r3 example with more up-to-date chain TDNN configuration --- .../s5_r3/local/chain/compare_wer_general.sh | 4 +- egs/tedlium/s5_r3/local/chain/run_tdnn.sh | 1 + egs/tedlium/s5_r3/local/chain/run_tdnnf.sh | 1 - .../s5_r3/local/chain/tuning/run_tdnn_1c.sh | 249 ++++++++++++++++++ 4 files changed, 252 insertions(+), 3 deletions(-) create mode 120000 egs/tedlium/s5_r3/local/chain/run_tdnn.sh delete mode 120000 egs/tedlium/s5_r3/local/chain/run_tdnnf.sh create mode 100755 egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh diff --git a/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh index 88dde1ff0e2..c709e351e1e 100755 --- a/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh +++ b/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh @@ -55,7 +55,7 @@ for n in 0 1 2 3; do for x in $*; do set_names $x # sets $dirname and $epoch_infix decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) - wer=$(grep Sum $dirname/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo @@ -64,7 +64,7 @@ for n in 0 1 2 3; do for x in $*; do set_names $x # sets $dirname and $epoch_infix decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) - wer=$(grep Sum $dirname/decode_looped_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo diff --git a/egs/tedlium/s5_r3/local/chain/run_tdnn.sh b/egs/tedlium/s5_r3/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..d48449e28bd --- /dev/null +++ b/egs/tedlium/s5_r3/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1c.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r3/local/chain/run_tdnnf.sh b/egs/tedlium/s5_r3/local/chain/run_tdnnf.sh deleted file mode 120000 index 61f8f499182..00000000000 --- a/egs/tedlium/s5_r3/local/chain/run_tdnnf.sh +++ /dev/null @@ -1 +0,0 @@ -tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..faac365af54 --- /dev/null +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh @@ -0,0 +1,249 @@ +#!/bin/bash + +# This is copied from tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh setup, and it replaces the current run_tdnn_1b.sh script. + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnnf_1b exp/chain_cleaned/tdnnf_1c +# System tdnnf_1b tdnnf_1c +# WER on dev(orig) 8.15 8.03 +# WER on dev(rescored) 7.69 7.44 +# WER on test(orig) 8.19 8.30 +# WER on test(rescored) 7.77 7.85 +# Final train prob -0.0692 -0.0669 +# Final valid prob -0.0954 -0.0838 +# Final train prob (xent) -0.9369 -0.9596 +# Final valid prob (xent) -1.0730 -1.0780 +# Num-params 25741728 9463968 + + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnnf_1b/ +# exp/chain_cleaned/tdnnf_1b/: num-iters=945 nj=2..6 num-params=25.7M dim=40+100->3664 combine=-0.074->-0.071 (over 6) xent:train/valid[628,944,final]=(-1.07,-0.959,-0.937/-1.20,-1.10,-1.07) logprob:train/valid[628,944,final]=(-0.088,-0.070,-0.069/-0.111,-0.098,-0.095) +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnnf_1c +# exp/chain_cleaned/tdnn1c/: num-iters=228 nj=3..12 num-params=9.5M dim=40+100->3664 combine=-0.068->-0.068 (over 4) xent:train/valid[151,227,final]=(-1.15,-0.967,-0.960/-1.25,-1.09,-1.08) logprob:train/valid[151,227,final]=(-0.090,-0.068,-0.067/-0.102,-0.05,-0.084) + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=15 +decode_nj=15 +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.5@0.50,0' + +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=1 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1c #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + affine_opts="l2-regularize=0.008 dropout-proportion=0.0 dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.008 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.008 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.008" + output_opts="l2-regularize=0.002" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1024 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + linear-component name=prefinal-l dim=256 $linear_opts + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=256 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=256 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.0 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0 --constrained false" \ + --egs.chunk-width 150,110,100 \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 5000000 \ + --trainer.num-epochs 6 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.00025 \ + --trainer.optimization.final-effective-lrate 0.000025 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs $remove_egs \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 From b180707611e31568082cc6d1fc669a534acff783 Mon Sep 17 00:00:00 2001 From: Lucas Jo Date: Thu, 4 Apr 2019 01:18:53 +0900 Subject: [PATCH 124/235] [scripts] Fix bug in extend_lang.sh causing validation failure w/ extra_disambig.txt (#3202) --- egs/wsj/s5/utils/lang/extend_lang.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/egs/wsj/s5/utils/lang/extend_lang.sh b/egs/wsj/s5/utils/lang/extend_lang.sh index c8f680a12fb..236e3ad6dd5 100755 --- a/egs/wsj/s5/utils/lang/extend_lang.sh +++ b/egs/wsj/s5/utils/lang/extend_lang.sh @@ -134,6 +134,11 @@ highest_number=$(tail -n 1 $srcdir/phones.txt | awk '{print $2}') awk -v start=$highest_number '{print $1, NR+start}' <$tmpdir/extra_disambig.txt >>$dir/phones.txt echo "$0: added $(wc -l <$tmpdir/extra_disambig.txt) extra disambiguation symbols to phones.txt" +# add extra_disambig symbols into disambig.txt +cat $tmpdir/extra_disambig.txt >> $dir/phones/disambig.txt +utils/sym2int.pl $dir/phones.txt <$dir/phones/disambig.txt >$dir/phones/disambig.int +utils/sym2int.pl $dir/phones.txt <$dir/phones/disambig.txt | \ + awk '{printf(":%d", $1);} END{printf "\n"}' | sed s/:// > $dir/phones/disambig.csl silphone=`cat $srcdir/phones/optional_silence.txt` || exit 1; [ -z "$silphone" ] && \ From 7093dfa4b1837ab7b7d5c3edd82bbd89f0c45ecb Mon Sep 17 00:00:00 2001 From: armusc <46787089+armusc@users.noreply.github.com> Date: Thu, 4 Apr 2019 19:31:09 +0200 Subject: [PATCH 125/235] [scripts] Bug-fix in make_lexicon_fst.py, which failed when --sil-prob=0 (#3206) --- egs/wsj/s5/utils/lang/make_lexicon_fst.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/lang/make_lexicon_fst.py b/egs/wsj/s5/utils/lang/make_lexicon_fst.py index 790af2f2314..e22222db340 100755 --- a/egs/wsj/s5/utils/lang/make_lexicon_fst.py +++ b/egs/wsj/s5/utils/lang/make_lexicon_fst.py @@ -209,7 +209,7 @@ def write_fst_no_silence(lexicon, nonterminals=None, left_context_phones=None): if nonterminals is not None: next_state = write_nonterminal_arcs( - start_state, loop_state, next_state, + loop_state, loop_state, next_state, nonterminals, left_context_phones) print("{state}\t{final_cost}".format( From 6f0a3a23ffa74a8e129e1f6bb5f78d75ed669ea9 Mon Sep 17 00:00:00 2001 From: Shujian2015 Date: Thu, 4 Apr 2019 15:18:00 -0400 Subject: [PATCH 126/235] [egs] Fix very small typo in run_tdnn_1b.sh (#3207) --- egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh index f06ba3fa195..744c964db2f 100755 --- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh @@ -55,7 +55,7 @@ nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned # are just hardcoded at this level, in the commands below. train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tdnnf_affix=_1a #affix for TDNNF directory, e.g. "a" or "b", in case we change the configuration. +tdnnf_affix=_1b #affix for TDNNF directory, e.g. "a" or "b", in case we change the configuration. common_egs_dir= # you can set this to use previously dumped egs. # End configuration section. From ddeac9837a8f1e390a75948d0f34931a8f145c77 Mon Sep 17 00:00:00 2001 From: "Patrick L. Lange" Date: Thu, 4 Apr 2019 12:25:46 -0700 Subject: [PATCH 127/235] [build] Tensorflow version update (#3204) --- src/tfrnnlm/Makefile | 8 +++++--- src/tfrnnlmbin/Makefile | 10 ++++++---- tools/extras/install_tensorflow_cc.sh | 9 +++++---- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/tfrnnlm/Makefile b/src/tfrnnlm/Makefile index db2b840b959..3dc8d584210 100644 --- a/src/tfrnnlm/Makefile +++ b/src/tfrnnlm/Makefile @@ -16,11 +16,13 @@ TENSORFLOW = ../../tools/tensorflow all: -EXTRA_CXXFLAGS = -Wno-sign-compare -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src \ +EXTRA_CXXFLAGS = -Wno-sign-compare \ + -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf_archive/src \ -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) \ -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \ -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/nsync/public \ - -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/protobuf/src + -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/protobuf/src \ + -I${TENSORFLOW}/tensorflow/contrib/makefile/downloads/absl OBJFILES = tensorflow-rnnlm.o @@ -29,7 +31,7 @@ TESTFILES = LIBNAME = kaldi-tensorflow-rnnlm ADDLIBS = ../lm/kaldi-lm.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../base/kaldi-base.a LDLIBS += -lz -ldl -fPIC -lrt LDLIBS += -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc -ltensorflow_framework diff --git a/src/tfrnnlmbin/Makefile b/src/tfrnnlmbin/Makefile index 4beeeb0d594..6963c0b62d0 100644 --- a/src/tfrnnlmbin/Makefile +++ b/src/tfrnnlmbin/Makefile @@ -14,11 +14,13 @@ TENSORFLOW = $(shell pwd)/../../tools/tensorflow all: -EXTRA_CXXFLAGS = -Wno-sign-compare -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src \ +EXTRA_CXXFLAGS = -Wno-sign-compare \ + -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf_archive/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) \ -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \ -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/nsync/public \ - -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/protobuf/src + -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/protobuf/src \ + -I${TENSORFLOW}/tensorflow/contrib/makefile/downloads/absl include ../kaldi.mk BINFILES = lattice-lmrescore-tf-rnnlm lattice-lmrescore-tf-rnnlm-pruned @@ -30,11 +32,11 @@ TESTFILES = ADDLIBS = ../lat/kaldi-lat.a ../lm/kaldi-lm.a ../fstext/kaldi-fstext.a \ ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ ../matrix/kaldi-matrix.a ../base/kaldi-base.a \ - ../tfrnnlm/kaldi-tensorflow-rnnlm.a + ../tfrnnlm/kaldi-tensorflow-rnnlm.a LDLIBS += -lz -ldl -fPIC -lrt LDLIBS += -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc -ltensorflow_framework -LDFLAGS += -Wl,-rpath=$(shell pwd)/../../tools/tensorflow/bazel-bin/tensorflow/ +LDFLAGS += -Wl,-rpath,$(TENSORFLOW)/bazel-bin/tensorflow/ include ../makefiles/default_rules.mk diff --git a/tools/extras/install_tensorflow_cc.sh b/tools/extras/install_tensorflow_cc.sh index 95e81053e74..b13fcbeff44 100755 --- a/tools/extras/install_tensorflow_cc.sh +++ b/tools/extras/install_tensorflow_cc.sh @@ -25,7 +25,7 @@ else fi -[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.4/bazel-0.5.4-dist.zip -O bazel.zip +[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-dist.zip -O bazel.zip mkdir -p bazel cd bazel unzip ../bazel.zip @@ -33,12 +33,13 @@ unzip ../bazel.zip cd ../ # now bazel is built -git clone https://github.com/tensorflow/tensorflow +[ ! -d tensorflow ] && git clone https://github.com/tensorflow/tensorflow cd tensorflow -git checkout r1.4 +git fetch --tags +git checkout r1.12 ./configure -tensorflow/contrib/makefile/download_dependencies.sh +tensorflow/contrib/makefile/download_dependencies.sh bazel build -c opt //tensorflow:libtensorflow.so bazel build -c opt //tensorflow:libtensorflow_cc.so From dc8a56e5bacbfbbee7573f00bbceed78398858c4 Mon Sep 17 00:00:00 2001 From: saikiranvalluri Date: Fri, 5 Apr 2019 06:57:03 -0400 Subject: [PATCH 128/235] Cosmetic fix --- egs/fisher_callhome_spanish/s5/run.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 95425c29034..17ef6313e5e 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -80,17 +80,18 @@ if [ $stage -le -1 ]; then fi -if $start_textcleanup; then - echo "WARNING : Starting from cleaning up and normalizing the Gigword text" - echo " This might take few days........... You can opt out this stage " - echo " by setting start_textcleanup=false, and having text_lm ready inside rnnlm_workdir." - - if [ $stage -le 0 ]; then +if [ $stage -le 0 ]; then + if $start_textcleanup; then + echo "WARNING : Starting from cleaning up and normalizing the Gigword text" + echo " This might take few days........... You can skip out this stage " + echo " by setting start_textcleanup=false, and having normalised_gigaword_corpus/text_normalized ready inside $rnnlm_workdir." + mkdir -p "$rnnlm_workdir"/gigaword_rawtext local/flatten_gigaword/flatten_all_gigaword.sh "$gigaword_datapath" "$rnnlm_workdir"/flattened_gigaword_corpus 24 cat "$rnnlm_workdir"/flattened_gigaword_corpus/*.flat > "$rnnlm_workdir"/gigaword_rawtext/in.txt local/clean_txt_dir.sh "$rnnlm_workdir"/gigaword_rawtext/ \ "$rnnlm_workdir"/normalised_gigaword_corpus/ + fi mkdir -p "$rnnlm_workdir"/text_lm cut -d " " -f 2- data/train/text > "$rnnlm_workdir"/text_lm/train.txt cut -d " " -f 2- data/dev2/text > "$rnnlm_workdir"/text_lm/dev.txt # For RNNLM and POCOLM training we use dev2/text as dev file. @@ -98,7 +99,6 @@ if $start_textcleanup; then if $addtraintext; then cat "$rnnlm_workdir"/text_lm/train.txt >> "$rnnlm_workdir"/text_lm/spanish_gigaword_normalised.txt fi - fi fi if [ $stage -le 1 ]; then From beb015175e4f3d50c9a33e25951241abafd78f49 Mon Sep 17 00:00:00 2001 From: Shiyin Kang Date: Sun, 7 Apr 2019 00:52:09 +0800 Subject: [PATCH 129/235] [src] Optimizations to CUDA kernels (#3209) --- src/cudamatrix/cu-kernels.cu | 57 ++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 515412ca398..bc5c32714ef 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -2487,7 +2487,7 @@ static void _heaviside(Real*y, const Real*x, MatrixDim d, int src_stride) { template __global__ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) { - __shared__ Real smem[CU1DBLOCK]; + __shared__ Real smem; typedef cub::BlockReduce BlockReduceT; __shared__ typename BlockReduceT::TempStorage temp_storage; const int i = blockIdx.x; @@ -2502,13 +2502,13 @@ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) { tmax = fmax(tmax, x[x_start + j]); } tmax = BlockReduceT(temp_storage).Reduce(tmax, cub::Max()); - if (tid == 0) { - smem[0] = tmax; - } // broadcast max to all threads + if (tid == 0) { + smem = tmax; + } __syncthreads(); - Real max = smem[0]; + Real max = smem; // sum_j(exp(x(i,j)-max)) // reduce to CU1DBLOCK elements per row. @@ -2517,13 +2517,13 @@ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) { tsum += exp(x[x_start + j] - max); } tsum = BlockReduceT(temp_storage).Sum(tsum); - if (tid == 0) { - smem[0] = tsum; - } // broadcast sum to all threads + if (tid == 0) { + smem = tsum; + } __syncthreads(); - Real inv_sum = Real(1) / smem[0]; + Real inv_sum = Real(1) / smem; // normalize the row for (int j = tid; j < d.cols; j += CU1DBLOCK) { @@ -2565,7 +2565,6 @@ static void _normalize_per_row(Real *y, int y_stride, const Real *x, tsum += x_row[j] * x_row[j]; } tsum = BlockReduceT(temp_storage).Sum(tsum); - __syncthreads(); if (tid == 0) { const Real kSquaredNormFloor = 1.3552527156068805425e-20; // 2^-66 @@ -2680,7 +2679,7 @@ template __global__ static void _log_softmax_reduce(Real* y, const Real* x, MatrixDim y_dim, int x_stride) { - __shared__ Real smem[CU1DBLOCK]; + __shared__ Real smem; typedef cub::BlockReduce BlockReduceT; __shared__ typename BlockReduceT::TempStorage temp_storage; const int i = blockIdx.x; @@ -2695,13 +2694,13 @@ static void _log_softmax_reduce(Real* y, const Real* x, MatrixDim y_dim, tmax = fmax(tmax, x[x_start + j]); } tmax = BlockReduceT(temp_storage).Reduce(tmax, cub::Max()); - if (tid == 0) { - smem[0] = tmax; - } // broadcast max to all threads + if (tid == 0) { + smem = tmax; + } __syncthreads(); - Real max = smem[0]; + Real max = smem; // sum_j(exp(x(i,j)-max)) // reduce to CU1DBLOCK elements per row. @@ -2710,13 +2709,13 @@ static void _log_softmax_reduce(Real* y, const Real* x, MatrixDim y_dim, tsum += exp(x[x_start + j] - max); } tsum = BlockReduceT(temp_storage).Sum(tsum); - if (tid == 0) { - smem[0] = tsum; - } // broadcast sum to all threads + if (tid == 0) { + smem = tsum; + } __syncthreads(); - Real log_sum = log(smem[0]); + Real log_sum = log(smem); // normalize the row for (int j = tid; j < y_dim.cols; j += CU1DBLOCK) { @@ -2956,7 +2955,7 @@ __global__ static void _diff_softmax(Real* x, const MatrixDim dim, const Real* value, const int value_stride, const Real* diff, const int diff_stride) { - __shared__ Real ssum[CU1DBLOCK]; + __shared__ Real ssum; typedef cub::BlockReduce BlockReduceT; __shared__ typename BlockReduceT::TempStorage temp_storage; @@ -2972,13 +2971,13 @@ static void _diff_softmax(Real* x, const MatrixDim dim, const Real* value, tsum += value[value_start + j] * diff[diff_start + j]; } tsum = BlockReduceT(temp_storage).Sum(tsum); - if (tid == 0) { - ssum[0] = tsum; - } // Broadcast result to all threads + if (tid == 0) { + ssum = tsum; + } __syncthreads(); - const Real pe = ssum[0]; + const Real pe = ssum; // Apply element-wise x = value * (diff - pe) for (int j = tid; j < dim.cols; j += CU1DBLOCK) { @@ -2998,7 +2997,7 @@ static void _diff_log_softmax(const MatrixDim in_deriv_dim, const Real* out_deriv, const int out_deriv_stride, Real* in_deriv) { - __shared__ Real ssum[CU1DBLOCK]; + __shared__ Real ssum; typedef cub::BlockReduce BlockReduceT; __shared__ typename BlockReduceT::TempStorage temp_storage; const int tid = threadIdx.x; @@ -3013,13 +3012,13 @@ static void _diff_log_softmax(const MatrixDim in_deriv_dim, tsum += out_deriv[out_deriv_start + j]; } tsum = BlockReduceT(temp_storage).Sum(tsum); - if (tid == 0) { - ssum[0] = tsum; - } // Broadcast result to all threads + if (tid == 0) { + ssum = tsum; + } __syncthreads(); - const Real sum_e = ssum[0]; + const Real sum_e = ssum; // Apply element-wise x = out_deriv - exp(value) * sum_e for (int j = tid; j < in_deriv_dim.cols; j += CU1DBLOCK) { From a3a190b73d137339e2add27250a1c2bc2e3e43a9 Mon Sep 17 00:00:00 2001 From: Justin Luitjens Date: Sat, 6 Apr 2019 20:19:05 -0600 Subject: [PATCH 130/235] [src] Move curand handle out of CuRand class and into CuDevice. (#3196) --- src/cudamatrix/cu-device.cc | 19 ++++++++++++++++++ src/cudamatrix/cu-device.h | 28 +++++++++++++++++++++----- src/cudamatrix/cu-rand.cc | 25 ++++++++++++++--------- src/cudamatrix/cu-rand.h | 40 +++---------------------------------- 4 files changed, 61 insertions(+), 51 deletions(-) diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc index e5d161521fd..d0708ef486d 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc @@ -123,6 +123,14 @@ void CuDevice::Initialize() { // Initialize the cuSPARSE library CUSPARSE_SAFE_CALL(cusparseCreate(&cusparse_handle_)); CUSPARSE_SAFE_CALL(cusparseSetStream(cusparse_handle_, cudaStreamPerThread)); + + // Initialize the generator, + CURAND_SAFE_CALL(curandCreateGenerator( + &curand_handle_, CURAND_RNG_PSEUDO_DEFAULT)); + // To get same random sequence, call srand() before the constructor is invoked, + CURAND_SAFE_CALL(curandSetGeneratorOrdering( + curand_handle_, CURAND_ORDERING_PSEUDO_DEFAULT)); + SeedGpu(); } } @@ -258,6 +266,14 @@ void CuDevice::FinalizeActiveGpu() { // Initialize the cuSPARSE library CUSPARSE_SAFE_CALL(cusparseCreate(&cusparse_handle_)); CUSPARSE_SAFE_CALL(cusparseSetStream(cusparse_handle_, cudaStreamPerThread)); + + // Initialize the generator, + CURAND_SAFE_CALL(curandCreateGenerator( + &curand_handle_, CURAND_RNG_PSEUDO_DEFAULT)); + // To get same random sequence, call srand() before the constructor is invoked, + CURAND_SAFE_CALL(curandSetGeneratorOrdering( + curand_handle_, CURAND_ORDERING_PSEUDO_DEFAULT)); + SeedGpu(); // Notify the user which GPU is being userd. char name[128]; @@ -529,6 +545,9 @@ CuDevice::~CuDevice() { CUBLAS_SAFE_CALL(cublasDestroy(cublas_handle_)); if (cusparse_handle_) CUSPARSE_SAFE_CALL(cusparseDestroy(cusparse_handle_)); + if (curand_handle_) { + CURAND_SAFE_CALL(curandDestroyGenerator(curand_handle_)); + } } diff --git a/src/cudamatrix/cu-device.h b/src/cudamatrix/cu-device.h index 8816f9d223b..7cca69f754b 100644 --- a/src/cudamatrix/cu-device.h +++ b/src/cudamatrix/cu-device.h @@ -26,6 +26,7 @@ #if HAVE_CUDA == 1 #include #include +#include #include #include #include @@ -34,6 +35,7 @@ #include "base/kaldi-common.h" #include "base/timer.h" #include "cudamatrix/cu-allocator.h" +#include "cudamatrix/cu-common.h" namespace kaldi { @@ -80,7 +82,16 @@ class CuDevice { inline cublasHandle_t GetCublasHandle() { return cublas_handle_; } inline cusparseHandle_t GetCusparseHandle() { return cusparse_handle_; } - + inline curandGenerator_t GetCurandHandle() { return curand_handle_; } + + inline void SeedGpu() { + if (CuDevice::Instantiate().Enabled()) { + // To get same random sequence, call srand() before the method is invoked, + CURAND_SAFE_CALL(curandSetPseudoRandomGeneratorSeed( + curand_handle_, RandInt(128, RAND_MAX))); + CURAND_SAFE_CALL(curandSetGeneratorOffset(curand_handle_, 0)); + } + } // We provide functions Malloc(), MallocPitch() and Free() which replace // cudaMalloc(), cudaMallocPitch() and cudaFree(). Their function is to cache // the results of previous allocations to avoid the very large overhead that @@ -291,9 +302,8 @@ class CuDevice { int32 device_id_copy_; cublasHandle_t cublas_handle_; - cusparseHandle_t cusparse_handle_; - + curandGenerator_t curand_handle_; }; // class CuDevice @@ -308,9 +318,17 @@ class CuTimer: public Timer { // This function is declared as a more convenient way to get the CUDA device handle for use // in the CUBLAS v2 API, since we so frequently need to access it. -inline cublasHandle_t GetCublasHandle() { return CuDevice::Instantiate().GetCublasHandle(); } +inline cublasHandle_t GetCublasHandle() { + return CuDevice::Instantiate().GetCublasHandle(); +} // A more convenient way to get the handle to use cuSPARSE APIs. -inline cusparseHandle_t GetCusparseHandle() { return CuDevice::Instantiate().GetCusparseHandle(); } +inline cusparseHandle_t GetCusparseHandle() { + return CuDevice::Instantiate().GetCusparseHandle(); +} + +inline curandGenerator_t GetCurandHandle() { + return CuDevice::Instantiate().GetCurandHandle(); +} } // namespace kaldi diff --git a/src/cudamatrix/cu-rand.cc b/src/cudamatrix/cu-rand.cc index 20439834a98..63d858c25e9 100644 --- a/src/cudamatrix/cu-rand.cc +++ b/src/cudamatrix/cu-rand.cc @@ -69,7 +69,8 @@ void CuRand::RandUniform(CuMatrixBase *tgt) { CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined, kStrideEqualNumCols); size_t s = static_cast(tmp.NumRows()) * static_cast(tmp.Stride()); - CURAND_SAFE_CALL(curandGenerateUniformWrap(gen_, tmp.Data(), s)); + CURAND_SAFE_CALL(curandGenerateUniformWrap( + GetCurandHandle(), tmp.Data(), s)); tgt->CopyFromMat(tmp); CuDevice::Instantiate().AccuProfile(__func__, tim); } else @@ -86,7 +87,8 @@ void CuRand::RandUniform(CuMatrix *tgt) { CuTimer tim; // Here we don't need to use 'tmp' matrix, size_t s = static_cast(tgt->NumRows()) * static_cast(tgt->Stride()); - CURAND_SAFE_CALL(curandGenerateUniformWrap(gen_, tgt->Data(), s)); + CURAND_SAFE_CALL(curandGenerateUniformWrap( + GetCurandHandle(), tgt->Data(), s)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -100,7 +102,8 @@ void CuRand::RandUniform(CuVectorBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CURAND_SAFE_CALL(curandGenerateUniformWrap(gen_, tgt->Data(), tgt->Dim())); + CURAND_SAFE_CALL(curandGenerateUniformWrap( + GetCurandHandle(), tgt->Data(), tgt->Dim())); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -125,7 +128,8 @@ void CuRand::RandGaussian(CuMatrixBase *tgt) { MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, CuMatrix tmp(tgt->NumRows(), num_cols_even, kUndefined, kStrideEqualNumCols); - CURAND_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); + CURAND_SAFE_CALL(curandGenerateNormalWrap( + GetCurandHandle(), tmp.Data(), tmp.NumRows()*tmp.Stride())); tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); CuDevice::Instantiate().AccuProfile(__func__, tim); } else @@ -143,7 +147,8 @@ void CuRand::RandGaussian(CuMatrix *tgt) { // Here we don't need to use 'tmp' matrix, if the number of elements is even, MatrixIndexT num_elements = tgt->NumRows() * tgt->Stride(); if (0 == (num_elements % 2)) { - CURAND_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), num_elements)); + CURAND_SAFE_CALL(curandGenerateNormalWrap( + GetCurandHandle(), tgt->Data(), num_elements)); } else { // We use 'tmp' matrix with one column added, this guarantees an even // number of elements. Use the option kStrideEqualNumCols to ensure @@ -152,8 +157,8 @@ void CuRand::RandGaussian(CuMatrix *tgt) { MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, CuMatrix tmp(tgt->NumRows(), num_cols_even, kUndefined, kStrideEqualNumCols); - CURAND_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), - tmp.NumRows() * tmp.Stride())); + CURAND_SAFE_CALL(curandGenerateNormalWrap( + GetCurandHandle(), tmp.Data(), tmp.NumRows() * tmp.Stride())); tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); } CuDevice::Instantiate().AccuProfile(__func__, tim); @@ -174,11 +179,13 @@ void CuRand::RandGaussian(CuVectorBase *tgt) { // curandGenerateUniform(), curandGenerateUniformDouble(). MatrixIndexT num_elements = tgt->Dim(); if (0 == (num_elements % 2)) { - CURAND_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), tgt->Dim())); + CURAND_SAFE_CALL(curandGenerateNormalWrap( + GetCurandHandle(), tgt->Data(), tgt->Dim())); } else { MatrixIndexT dim_even = tgt->Dim() + (tgt->Dim() % 2); // + 0 or 1, CuVector tmp(dim_even, kUndefined); - CURAND_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.Dim())); + CURAND_SAFE_CALL(curandGenerateNormalWrap( + GetCurandHandle(), tmp.Data(), tmp.Dim())); tgt->CopyFromVec(tmp.Range(0,tgt->Dim())); } CuDevice::Instantiate().AccuProfile(__func__, tim); diff --git a/src/cudamatrix/cu-rand.h b/src/cudamatrix/cu-rand.h index fafc747df8d..6e0be648270 100644 --- a/src/cudamatrix/cu-rand.h +++ b/src/cudamatrix/cu-rand.h @@ -20,10 +20,7 @@ #ifndef KALDI_CUDAMATRIX_CU_RAND_H_ #define KALDI_CUDAMATRIX_CU_RAND_H_ -#if HAVE_CUDA == 1 - #include -#endif - +#include "cudamatrix/cu-device.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-vector.h" #include "base/kaldi-math.h" @@ -33,36 +30,10 @@ namespace kaldi { template class CuRand { public: - CuRand() { - #if HAVE_CUDA == 1 - if (CuDevice::Instantiate().Enabled()) { - // Initialize the generator, - CURAND_SAFE_CALL(curandCreateGenerator(&gen_, CURAND_RNG_PSEUDO_DEFAULT)); - // To get same random sequence, call srand() before the constructor is invoked, - CURAND_SAFE_CALL(curandSetGeneratorOrdering(gen_, CURAND_ORDERING_PSEUDO_DEFAULT)); - CURAND_SAFE_CALL(curandSetPseudoRandomGeneratorSeed(gen_, RandInt(128, RAND_MAX))); - CURAND_SAFE_CALL(curandSetGeneratorOffset(gen_, 0)); - } - #endif - } - ~CuRand() { + void SeedGpu() { #if HAVE_CUDA == 1 - if (CuDevice::Instantiate().Enabled()) { - // Release the generator, - CURAND_SAFE_CALL(curandDestroyGenerator(gen_)); - } - #endif - } - - /// Generate new seed for the GPU, - void SeedGpu() { - #if HAVE_CUDA == 1 - if (CuDevice::Instantiate().Enabled()) { - // To get same random sequence, call srand() before the method is invoked, - CURAND_SAFE_CALL(curandSetPseudoRandomGeneratorSeed(gen_, RandInt(128, RAND_MAX))); - CURAND_SAFE_CALL(curandSetGeneratorOffset(gen_, 0)); - } + CuDevice::Instantiate().SeedGpu(); #endif } @@ -88,11 +59,6 @@ class CuRand { void BinarizeProbs(const CuMatrix &probs, CuMatrix *states); /// add gaussian noise to each element, void AddGaussNoise(CuMatrix *tgt, Real gscale = 1.0); - - private: - #if HAVE_CUDA == 1 - curandGenerator_t gen_; - #endif }; } // namsepace From faa7ff891026816c24d9247e38ab67e566a59379 Mon Sep 17 00:00:00 2001 From: "kkm (aka Kirill Katsnelson)" Date: Sat, 6 Apr 2019 19:20:26 -0700 Subject: [PATCH 131/235] [build] Make MKL the default BLAS library, add installation scripts (#3194) --- src/configure | 4 +- src/doc/build_setup.dox | 10 +- src/doc/matrixwrap.dox | 235 +++++++++++++++---------- tools/extras/check_dependencies.sh | 180 ++++++++++---------- tools/extras/install_mkl.sh | 265 +++++++++++++++++++++++++++++ 5 files changed, 504 insertions(+), 190 deletions(-) create mode 100755 tools/extras/install_mkl.sh diff --git a/src/configure b/src/configure index 1013a3c162e..04c33236437 100755 --- a/src/configure +++ b/src/configure @@ -809,8 +809,8 @@ threaded_atlas=false mkl_threading=sequential android=false -MATHLIB='ATLAS' -ATLASROOT=`rel2abs ../tools/ATLAS_headers/` +MATHLIB=MKL +MKLROOT=/opt/intel/mkl FSTROOT=`rel2abs ../tools/openfst` CUBROOT=`rel2abs ../tools/cub` diff --git a/src/doc/build_setup.dox b/src/doc/build_setup.dox index 47ff7e033a8..5ea2e212b20 100644 --- a/src/doc/build_setup.dox +++ b/src/doc/build_setup.dox @@ -32,12 +32,12 @@ The build process for Windows is separate from the build process for UNIX-like systems, and is described in windows/INSTALL (tested some time ago with - Windows 7 and Microsoft Visual Studio 10.0). We use scripts to + Windows 7 and Microsoft Visual Studio 2013). We use scripts to create the Visual Studio 10.0 solution file. There are two options for - the math library on Windows: either you can use Cygwin to compile ATLAS, or you - can use the Intel MKL library. Detailed instructions are provided. However, note + the math library on Windows: either Intel MKL, or use Cygwin to compile ATLAS. + Detailed instructions are provided. However, note that the Windows setup is becoming out of date and is not regularly tested, - and not all the code currently compiles on it. + and not all the may compile. \section build_setup_configure How our configure script works (for UNIX variants) @@ -143,6 +143,6 @@ preprocessor variables, setting compile options, linking with libraries, and so We have compiled Kaldi on Windows, Cygwin, various flavors of Linux (including Ubuntu, CentOS, Debian, Red Hat and SUSE), and Darwin. We recommend you use g++ version -4.4 or above, although other compilers such as llvm and Intel's icc are also known to work. +4.7 or above, although other compilers such as llvm and Intel's icc are also known to work. */ diff --git a/src/doc/matrixwrap.dox b/src/doc/matrixwrap.dox index fb595d581fe..9cf5e92ca48 100644 --- a/src/doc/matrixwrap.dox +++ b/src/doc/matrixwrap.dox @@ -22,93 +22,155 @@ namespace kaldi { /** \page matrixwrap External matrix libraries - Here we describe how our \ref matrix "matrix library" makes use of + Here we describe how our \ref matrix "matrix library" makes use of external libraries. \section matrixwrap_summary Overview - - The matrix code in Kaldi is mostly a wrapper on top of the - linear-algebra libraries BLAS and LAPACK. The code has been designed to be as flexible - as possible in terms of what libraries it can use. Currently it supports four options: + + The matrix code in Kaldi is mostly a wrapper on top of the linear-algebra + libraries BLAS and LAPACK. The code has been designed to be as flexible as + possible in terms of what libraries it can use. Currently it supports four + options: + - Intel MKL, which provides both BLAS and LAPACK (the default) + - OpenBLAS, which provides BLAS and LAPACK - ATLAS, which is an implementation of BLAS plus a subset of LAPACK (with a different interface) - Some implementation of BLAS plus CLAPACK (note: this has not been tested recently). - - Intel's MKL, which provides both BLAS and LAPACK - - OpenBLAS, which provides BLAS and LAPACK - The code has to "know" which of these four options is being used, because although in principle - BLAS and LAPACK are standardized, there are some differences in the interfaces. - The Kaldi code requires exactly one - of the three strings HAVE_ATLAS, HAVE_CLAPACK, HAVE_OPENBLAS or HAVE_MKL to be defined - (e.g. using -DHAVE_ATLAS as an option to the compiler). It must then be - linked with the appropriate libraries. The code that deals most directly - with including the external libraries and setting up the appropriate - typedef's and defines, is in \ref kaldi-blas.h. However, the rest of - the matrix code is not completely insulated from these issues because the ATLAS - and CLAPACK versions of higher-level routines are called differently (so - we have a lot of "#ifdef HAVE_ATLAS" directives and the like). Additionally, some routines - are not even available in ATLAS so we have had to implement them ourselves. - - The "configure" script in the "src" directory is responsible for setting up Kaldi to use the libraries. - It does this by creating the file "kaldi.mk" in the "src" directory, which gives appropriate flags - to the compiler. If called with no arguments it will use any ATLAS installation it can find in "normal" places - in your system, but it is quite configurable. See the script itself for usage. - - \section matrixwrap_blas Basic Linear Algebra Subroutines (BLAS) - - Because we refer a lot to BLAS in this section, we briefly explain what it is. - BLAS is a set of subroutine declarations that correspond to low-level - matrix-vector operations. There is Level 1 Blas (vector-vector), Level 2 - (vector-matrix) and Level 3 (matrix-matrix). They have names like daxpy (for - double-precision a*x plus y), and dgemm (for double general matrix-matrix - multiply). BLAS has various actual implementations. The "reference BLAS", - supplied I believe by Netlib (the folks who also brought us the most common version - of LAPACK), is one. ATLAS is another one (but it also implements some functions - from LAPACK). - - \section matrixwrap_lapack Linear Algebra PACKage (LAPACK) - - Lapack is a set of linear-algebra routines, originally written in Fortran. It includes - higher-level routines than BLAS, such as matrix inversion, SVD, etc. - Netlib has implemented this (this is the "normal" LAPACK). LAPACK requires - BLAS. It is possible to mix-and-match LAPACK and BLAS implementations - (e.g. Netlib's LAPACK with ATLAS's BLAS). - - CLAPACK is a version of LAPACK that has been converted from Fortan to C automatically - using the f2c utility. When we talk about using LAPACK, we are actually - talking about using CLAPACK. Because CLAPACK has been converted to C using the - f2c utility, when we link against it we need to include the f2c library (e.g. -lf2c, - or -lg2c if using recent versions of gcc), otherwise we will get linking errors. - - - \section matrixwrap_atlas Automatically Tuned Linear Algebra Software (ATLAS) + The code has to "know" which of these four options is being used, because + although in principle BLAS and LAPACK are standardized, there are some + differences in the interfaces. The Kaldi code requires exactly one of the + three macros \c HAVE_ATLAS, \c HAVE_CLAPACK, \c HAVE_OPENBLAS or \c HAVE_MKL + to be defined (normally using \c -DHAVE_ATLAS as an option to the compiler). + It must then be linked with the appropriate libraries. The code that deals + most directly with including the external libraries and setting up the + appropriate typedef's and defines, is in \ref kaldi-blas.h. However, the rest + of the matrix code is not completely insulated from these issues because the + ATLAS and CLAPACK versions of higher-level routines are called differently (so + we have a lot of "#ifdef HAVE_ATLAS" directives and the like). Additionally, + some routines are not even available in ATLAS so we have had to implement them + ourselves. + + The "configure" script in the "src" directory is responsible for setting up + Kaldi to use the libraries. It does this by creating the file "kaldi.mk" in + the "src" directory, which gives appropriate flags to the compiler. If called + with no arguments it will use any Intel MKL installation it can find in + "normal" places in your system, but it is configurable. Run the script with + the \c \--help option for the complete option list. + + \section matrixwrap_matalgebra Understanding BLAS and LAPACK + + Because we refer a lot to BLAS (and more often CBLAS) and LAPACK (or, rarely, + CLAPACK) in this section, we briefly explain what it is. + + \subsection matrixwrap_blas Basic Linear Algebra Subroutines (BLAS) + + BLAS is a set of subroutine declarations that correspond to low-level + matrix-vector operations. There is BLAS Level 1 (vector-vector), Level 2 + (vector-matrix) and Level 3 (matrix-matrix). They have names like \c daxpy + (for \"double-precision \b a \b x plus \b y\"), and \c dgemm + (for "double-precision general matrix-matrix multiply"). BLAS has various + actual implementations. The reference + implementation of BLAS originated back in 1979, and has been maintained + since by Netlib. The reference implementation lacks any optimization + whatsoever, and exists solely as a touchstone to validate the correctness of + other implementations. MKL, ATLAS and OpenBLAS provide optimized + implementations of BLAS. + + CBLAS is just the C language interface to BLAS. + + \subsection matrixwrap_lapack Linear Algebra PACKage (LAPACK) + + LAPACK is a set of linear-algebra routines, originally written in Fortran. It + includes higher-level routines than BLAS, such as matrix inversion, SVD, etc. + The reference implementation of + LAPACK was implemented and has been maintained by Netlib. LAPACK + internally uses BLAS. It is possible to mix-and-match LAPACK and BLAS + implementations (e.g. Netlib's LAPACK with ATLAS's BLAS). + + CLAPACK is a version of LAPACK that has been converted from Fortan to C + automatically using the f2c utility. Because of this, the f2c library is + required during linking with the "original" CLAPACK (usually \c -lg2c or + \c -lf2c). + + MKL provides complete C-callable interfaces for its own BLAS and LAPACK + implementations; no additional libraries are required. + + \section matrixwrap_mkl Intel Math Kernel Library (MKL) + + Intel MKL provides C-language interface to a high-performance implementation + of the BLAS and LAPACK routines, and is currently the preferred CBLAS/CLAPACK + provider for Kaldi. To use MKL with Kaldi use the \c -DHAVE_MKL compiler flag. + + Previously MKL used to be a paid product. Starting 2017, Intel made MKL freely + available and allows royalty-freely runtime redistribution even for commercial + application (although, just like, for example, CUDA, it is still a + closed-source commercial product). + + MKL provides a very highly optimized implementation of linear algebra + routines, and especially on Intel CPUs. In fact, the library contains multiple + code paths, which are selected at runtime depending on individual features of + the CPU it is being loaded on. Thus with MKL you will automatically benefit + from all features and instruction sets (such as AVX2 and AVX512) if they are + available on your CPU, without any additional configuration. These + instructions accelerate linear algebra operations on CPU significantly. It is + usually a good idea to use a recent MKL version if your CPU is of a newer + architecture. + + To simplify MKL setup on Linux, we provide a script + \c tools/extras/install_mkl.sh. We install only 64-bit binaries for MKL, but + once the \c install_mkl.sh script completes successfully once, the Intel + repositories are registered on your system, and you can both obtain new + versions and 32-bit libraries using your system's package manager. + + For Mac and Windows, + download the installer from Intel's Web site (registration may be + required). Refer to the same page in case the above Linux script does not + support your Linux distribution. The Intel installers (Mac, Windows) let you + select the 32-bit and 64-bit packages separately. To run Kaldi training + recipes only the 64-bit version is required. + + We have tested Kaldi extensively with 64-bit libraries under Linux and + Windows. + + The + MKL Link Line Advisor is an interactive Web tool that allows configuring + the compiler flags for various systems and compilers, in case our "configure" + script does not cover it. + \n \b NOTE: Do not use the the multithreaded mode for + Kaldi training (select "sequential" as the threading option). Our script and + binary setups are designed to run multiple processes on a single machine, + presumably maxing out its CPU, and an attempt to multi-thread linear algebra + computations will only adversely impact the performance. + + \section matrixwrap_atlas Automatically Tuned Linear Algebra Software (ATLAS) ATLAS is a well known implementation of BLAS plus a subset of LAPACK. The general idea of ATLAS is to tune to the particular processor setup, so the compilation process is quite complex and can take a while. For this reason, - it can be quite tricky to compile ATLAS. On UNIX-based systems, you can't even do it unless you + it can be quite tricky to compile ATLAS. On UNIX-based systems, you can't even do it unless you are root or are friendly with your system administrator, because to compile it you need to turn off CPU throttling; and on Windows, ATLAS does not compile "natively", only in Cygwin. Sometimes it can be a better bet to find libraries that have been compiled by someone else for your particular platform, but we can't offer - much advice on how to do this. ATLAS generally performs better + much advice on how to do this. ATLAS generally performs better than the "reference BLAS" available from Netlib. ATLAS only includes a few LAPACK routines. These include matrix inversion and Cholesky factorization, but not SVD. For this reason we have implemented a couple more of the LAPACK - routines (SVD and eigenvalue decomposition); see + routines (SVD and eigenvalue decomposition); see the next section. - + ATLAS conforms to the BLAS interface, but its interface for the subset of - LAPACK routines that it provides is not the same as Netlib's (it's more - C-like and less FORTRAN-ish). For this reason, there are quite a number of #ifdef's in our code - to switch between the calling styles, depending whether we are + LAPACK routines that it provides is not the same as Netlib's (it's more C-like + and less FORTRAN-ish). For this reason, there are quite a number of \#ifdef's + in our code to switch between the calling styles, depending whether we are linking with ATLAS or CLAPACK. - + \subsection matrixwrap_atlas_install_windows Installing ATLAS (on Windows) For instructions on how to install ATLAS on Windows (and note that these instructions require Cygwin), see the file windows/INSTALL.atlas in our source distribution. Note that our Windows setup is not being - actvely maintained at the moment and we don't anticipate that it will work + actively maintained at the moment and we don't anticipate that it will work very cleanly. \subsection matrixwrap_atlas_install_linux Installing ATLAS (on Linux) @@ -118,39 +180,31 @@ namespace kaldi { pre-built binaries available, they may not be the best binaries possible for your architecture so it is probably a better idea to compile from source. The easiest way to do this - is to cd from "src" to "../tools" and to run ./install_atlas.sh. + is to cd from "src" to "../tools" and to run ./install_atlas.sh. If this does not work, the detailed installation - instructions can be found at: http://math-atlas.sourceforge.net/atlas_install/. - + instructions can be found at: http://math-atlas.sourceforge.net/atlas_install/. + One useful note is that before installing ATLAS you should turn off CPU - throttling using "cpufreq-selector -g performance" (cpufreq-selector may be in - sbin), if it is enabled (see the ATLAS install page). You can first try running the - "install_atlas.sh" script before doing this, to see whether it works-- if CPU + throttling using "cpufreq-selector -g performance" (cpufreq-selector may be in + sbin), if it is enabled (see the ATLAS install page). You can first try running the + "install_atlas.sh" script before doing this, to see whether it works-- if CPU throttling is enabled, the ATLAS installation scripts will die with an error. - - \section matrixwrap_mkl Intel Math Kernel Library (MKL) - Intel MKL also provides C-language interface to the BLAS and LAPACK routines, - and can be used with Kaldi by using the -DHAVE_MKL compiler flag. The linker - flags for MKL tend to be quite different depending on the OS, architecture, - compiler, etc. used. We have tested Kaldi on 32-bit Windows and x86_64 (or EMT64) Linux. - Flags for other platforms can be obtained from: - http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/ \section matrixwrap_openblas OpenBLAS - Kaldi now supports linking against the OpenBLAS library, which is an implementation + Kaldi now supports linking against the OpenBLAS library, which is an implementation of BLAS and parts of LAPACK. OpenBLAS also automatically compiles Netlib's implementation of LAPACK, - so that it can explort LAPACK in its entirety. + so that it can export LAPACK in its entirety. OpenBLAS is a fork of the GotoBLAS project (an assembler-heavy implementation of BLAS) which is no longer being maintained. In order to use GotoBLAS you can cd from "src" to "../tools", type "make openblas", then cd to "../src" and give the correct option to the "configure" script to use OpenBLAS (look at the comments at the top of the configure script to find this option). Thanks to Sola Aina for suggesting this and helping us to get this to work. - + \section matrixwrap_jama Java Matrix Package (JAMA) JAMA is an implementation of linear-algebra routines for Java, written - in collaboration between NIST and MathWorks and put into the public domain + in collaboration between NIST and MathWorks and put into the public domain (see math.nist.gov/javanumerics/jama). We used some of this code to fill in a couple of holes in ATLAS-- specifically, if we're compiling with -DHAVE_ATLAS, we don't have the CLAPACK routines for SVD and eigenvalue @@ -165,7 +219,7 @@ namespace kaldi { directory and see if it succeeds. A lot of compilation issues will manifest themselves as linking errors. In this section we give a summary of some of the more common linking errors (at least, those that relate specifically to the matrix library). - + Depending on the compilation option (-DHAVE_CLAPACK, -DHAVE_LAPACK or -DHAVE_MKL), the code will be expecting to link with different things. When debugging linking errors, bear in mind that the problem could be a mismatch between the compilation @@ -182,7 +236,7 @@ namespace kaldi { s_cat, pow_dd, r_sign, pow_ri, pow_di, s_copy, s_cmp, d_sign \subsection matrix_err_clapack CLAPACK linking errors - + You will get these errors if you compiled with -DHAVE_CLAPACK but did not provide the CLAPACK library. The symbols you will be missing are: @@ -195,15 +249,15 @@ namespace kaldi { but it supplies different symbols. The native CLAPACK version of liblapack has symbols like those above (e.g. sgesvd_, sgetrf_), but the ATLAS version has symbols like clapack_sgetrf and also ones like ATL_sgetrf. - + \subsection matrix_err_blas BLAS linking errors - + You will get these errors if you failed to link against an implementation of BLAS. These errors can also occur if libraries are linked in the wrong order. CLAPACK requires BLAS, so you have to link BLAS after CLAPACK. - + The symbols you will see if you failed to link with BLAS include: - + cblas_sger, cblas_saxpy, cblas_dapy, cblas_ddot, cblas_sdot, cblas_sgemm, cblas_dgemm To fix these, link with a static library like libcblas.a, or do -lcblas (assuming @@ -220,7 +274,7 @@ namespace kaldi { CLAPACK. The cblaswrap library should be invoked before the cblas one. If you are missing cblaswrap, you will see errors about symbols like: - f2c_sgemm, f2c_strsm, f2c_sswap, f2c_scopy, f2c_sspmv, f2c_sdot, f2c_sgemv + f2c_sgemm, f2c_strsm, f2c_sswap, f2c_scopy, f2c_sspmv, f2c_sdot, f2c_sgemv and so on (there are a lot of these symbols). @@ -235,15 +289,15 @@ namespace kaldi { \subsection matrix_err_atl_clapack Missing the ATLAS implementation of (parts of) CLAPACK - These errors can only occur if you compiled wiht the -DHAVE_ATLAS option. + These errors can only occur if you compiled with the -DHAVE_ATLAS option. Atlas's name for the CLAPACK routines are different from clapack's own (they have clapack_ prepended to indicate the origin, which can be quite confusing). If you have undefined references to the following symbols: - + clapack_sgetrf, clapack_sgetri, clapack_dgetrf, clapack_dgetri - then it means you failed to link with an ATLAS library containing these symbols. + then it means you failed to link with an ATLAS library containing these symbols. This may be variously called liblapack.a, libclapack.a or liblapack_atlas.a, but you can tell that it is the right one if it defines a symbol called ATL_cgetrf (type "nm | grep ATL_cgetrf" to see). You may be able to link @@ -254,7 +308,6 @@ namespace kaldi { out is to look inside it using "nm" or "strings". - */ } diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh index 1b63c4c99d9..0ee7e5b38dc 100755 --- a/tools/extras/check_dependencies.sh +++ b/tools/extras/check_dependencies.sh @@ -10,48 +10,45 @@ debian_packages= opensuse_packages= function add_packages { - redhat_packages="$redhat_packages $1"; - debian_packages="$debian_packages $2"; - opensuse_packages="$opensuse_packages $3"; + redhat_packages="$redhat_packages $1" + debian_packages="$debian_packages ${2:-$1}" + opensuse_packages="$opensuse_packages ${3:-$1}" } -if ! which which >&/dev/null; then - echo "$0: which is not installed." - add_packages which debianutils which -fi +function have { type -t "$1" >/dev/null; } -COMPILER_VER_INFO=$($CXX --version 2>/dev/null) -case $COMPILER_VER_INFO in +compiler_ver_info=$($CXX --version 2>/dev/null) +case $compiler_ver_info in "") - echo "$0: $CXX is not installed." + echo "$0: Compiler '$CXX' is not installed." echo "$0: You need g++ >= 4.8.3, Apple Xcode >= 5.0 or clang >= 3.3." - add_packages gcc-c++ g++ gcc-c++ + add_packages gcc-c++ g++ status=1 ;; "g++ "* ) - GCC_VER=$($CXX -dumpversion) - GCC_VER_NUM=$(echo $GCC_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") - if [ $GCC_VER_NUM -lt 40803 ]; then - echo "$0: $CXX (g++-$GCC_VER) is not supported." + gcc_ver=$($CXX -dumpversion) + gcc_ver_num=$(echo $gcc_ver | sed 's/\./ /g' | xargs printf "%d%02d%02d") + if [ $gcc_ver_num -lt 40803 ]; then + echo "$0: Compiler '$CXX' (g++-$gcc_ver) is not supported." echo "$0: You need g++ >= 4.8.3, Apple clang >= 5.0 or LLVM clang >= 3.3." status=1 fi ;; "Apple LLVM "* ) # See https://gist.github.com/yamaya/2924292 - CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") - CLANG_VER_NUM=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*clang-\([0-9]*\).*/\1/") - if [ $CLANG_VER_NUM -lt 500 ]; then - echo "$0: $CXX (Apple clang-$CLANG_VER) is not supported." + clang_ver=$(echo $compiler_ver_info | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + clang_ver_num=$(echo $compiler_ver_info | grep version | sed "s/.*clang-\([0-9]*\).*/\1/") + if [ $clang_ver_num -lt 500 ]; then + echo "$0: Compiler '$CXX' (Apple clang-$clang_ver) is not supported." echo "$0: You need g++ >= 4.8.3, Apple clang >= 5.0 or LLVM clang >= 3.3." status=1 fi ;; "clang "* ) - CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") - CLANG_VER_NUM=$(echo $CLANG_VER | sed 's/\./ /g' | xargs printf "%d%02d") - if [ $CLANG_VER_NUM -lt 303 ]; then - echo "$0: $CXX (LLVM clang-$CLANG_VER) is not supported." + clang_ver=$(echo $compiler_ver_info | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + clang_ver_num=$(echo $clang_ver | sed 's/\./ /g' | xargs printf "%d%02d") + if [ $clang_ver_num -lt 303 ]; then + echo "$0: Compiler '$CXX' (LLVM clang-$clang_ver) is not supported." echo "$0: You need g++ >= 4.8.3, Apple clang >= 5.0 or LLVM clang >= 3.3." status=1 fi @@ -61,53 +58,55 @@ case $COMPILER_VER_INFO in ;; esac -if ! echo "#include " | $CXX -E - >&/dev/null; then +# Cannot check this without a compiler. +if have "$CXX" && ! echo "#include " | $CXX -E - >&/dev/null; then echo "$0: zlib is not installed." - add_packages zlib-devel zlib1g-dev zlib-devel + add_packages zlib-devel zlib1g-dev fi for f in make automake autoconf patch grep bzip2 gzip unzip wget git sox; do - if ! which $f >&/dev/null; then + if ! have $f; then echo "$0: $f is not installed." - add_packages $f $f $f + add_packages $f fi done -if ! which libtoolize >&/dev/null && ! which glibtoolize >&/dev/null; then +if ! have libtoolize && ! have glibtoolize; then echo "$0: neither libtoolize nor glibtoolize is installed" - add_packages libtool libtool libtool + add_packages libtool fi -if ! which svn >&/dev/null; then +if ! have svn; then echo "$0: subversion is not installed" - add_packages subversion subversion subversion + add_packages subversion fi -if ! which awk >&/dev/null; then +if ! have awk; then echo "$0: awk is not installed" - add_packages gawk gawk gawk + add_packages gawk fi pythonok=true -if ! which python2.7 >&/dev/null; then +if ! have python2.7; then echo "$0: python2.7 is not installed" - add_packages python2.7 python2.7 + add_packages python2.7 pythonok=false fi -if ! which python3 >&/dev/null; then +if ! have python3; then echo "$0: python3 is not installed" - add_packages python3 python3 + add_packages python3 pythonok=false fi ( #Use a subshell so that sourcing env.sh does not have an influence on the rest of the script [ -f ./env.sh ] && . ./env.sh -if $pythonok && ! which python2 >&/dev/null; then +if $pythonok && ! have python2; then mkdir -p $PWD/python - echo "$0: python2.7 is installed, but the python2 binary does not exist. Creating a symlink and adding this to tools/env.sh" - ln -s $(which python2.7) $PWD/python/python2 + echo "$0: python2.7 is installed, but the python2 binary does not exist." \ + "Creating a symlink and adding this to tools/env.sh" + ln -s $(command -v python2.7) $PWD/python/python2 echo "export PATH=$PWD/python:\${PATH}" >> env.sh fi @@ -115,13 +114,15 @@ if [[ -f $PWD/python/.use_default_python && -f $PWD/python/python ]]; then rm $PWD/python/python fi -if $pythonok && which python >&/dev/null && [[ ! -f $PWD/python/.use_default_python ]]; then - version=`python 2>&1 --version | awk '{print $2}' ` +if $pythonok && have python && [[ ! -f $PWD/python/.use_default_python ]]; then + version=$(python 2>&1 --version | awk '{print $2}') if [[ $version != "2.7"* ]] ; then - echo "$0: WARNING python 2.7 is not the default python. We fixed this by adding a correct symlink more prominently on the path." - echo "$0: If you really want to use python $version as default, add an empty file $PWD/python/.use_default_python and run this script again." + echo "$0: WARNING python 2.7 is not the default python. We fixed this by" \ + "adding a correct symlink more prominently on the path." + echo " ... If you really want to use python $version as default, add an" \ + "empty file $PWD/python/.use_default_python and run this script again." mkdir -p $PWD/python - ln -s $(which python2.7) $PWD/python/python + ln -s $(command -v python2.7) $PWD/python/python echo "export PATH=$PWD/python:\${PATH}" >> env.sh fi fi @@ -129,66 +130,61 @@ fi printed=false -if which apt-get >&/dev/null && ! which zypper >/dev/null; then - # if we're using apt-get [but we're not OpenSuse, which uses zypper as the - # primary installer, but sometimes installs apt-get for some compatibility - # reason without it really working]... - if [ ! -z "$debian_packages" ]; then - echo "$0: we recommend that you run (our best guess):" - echo " sudo apt-get install $debian_packages" - printed=true - status=1 - fi - if ! dpkg -l | grep -E 'libatlas3gf|libatlas3-base' >/dev/null; then - echo "You should probably do: " - echo " sudo apt-get install libatlas3-base" - printed=true - fi -elif which yum >&/dev/null; then - if [ ! -z "$redhat_packages" ]; then - echo "$0: we recommend that you run (our best guess):" - echo " sudo yum install $redhat_packages" - printed=true - status=1 - fi - if ! rpm -qa| grep atlas >/dev/null; then - echo "You should probably do something like: " - echo "sudo yum install atlas.x86_64" - printed=true - fi -elif which zypper >&/dev/null; then - if [ ! -z "$opensuse_packages" ]; then - echo "$0: we recommend that you run (our best guess):" - echo " sudo zypper install $opensuse_packages" - printed=true - status=1 - fi - if ! zypper search -i | grep -E 'libatlas3|libatlas3-devel' >/dev/null; then - echo "You should probably do: " - echo "sudo zypper install libatlas3-devel" - printed=true +# MKL. We do not know if compiler exists at this point, so double-check +# the well-known mkl.h file location. The compiler test would still find +# it if installed in an alternative location (this is unlikely). +if [ ! -f /opt/intel/mkl/include/mkl.h ] && + ! echo '#include ' | $CXX -I /opt/intel/mkl/include -E - >&/dev/null; then + if [[ $(uname) == Linux ]]; then + echo "$0: Intel MKL is not installed. Run extras/install_mkl.sh to install it." + else + echo "$0: Intel MKL is not installed. Download the installer package for your + ... system from: https://software.intel.com/mkl/choose-download." fi + echo "\ + ... You can also use other matrix algebra libraries. For information, see: + ... http://kaldi-asr.org/doc/matrixwrap.html" + printed=true fi -if [ ! -z "$debian_packages" ]; then - # If the list of packages to be installed is nonempty, - # we'll exit with error status. Check this outside of - # checking for yum or apt-get, as we want it to exit with - # error even if we're not on Debian or red hat. +# Report missing programs and libraries. +if [ -n "$debian_packages" ]; then + install_pkg_command=$( + # Guess package manager from user's distribution type. Use a subshell + # because we are potentially importing a lot of dirt here. + eval $(grep 2>/dev/null ^ID /etc/os-release) 2>/dev/null + for rune in ${ID-} ${ID_LIKE-}; do + # The case '(pattern)' syntax is necessary in subshell for bash 3.x. + case $rune in + (rhel|centos|redhat) echo "yum install $redhat_packages"; break;; + (fedora) echo "dnx install $redhat_packages"; break;; + (suse) echo "zypper install $opensuse_packages"; break;; + (debian) echo "apt-get install $debian_packages"; break;; + esac + done + ) + + # Print the suggestion to install missing packages. + if [ -n "$install_pkg_command" ]; then + echo "$0: Some prerequisites are missing; install them using the command:" + echo " sudo" $install_pkg_command + else + echo "$0: The following prerequisites are missing; install them first:" + echo " " $debian_packages + fi status=1 fi - if [ $(pwd | wc -w) -gt 1 ]; then echo "*** $0: Warning: Kaldi scripts will fail if the directory name contains a space." echo "*** (it's OK if you just want to compile a few tools -> disable this check)." - status=1; + status=1 fi -if which grep >&/dev/null && pwd | grep -E 'JOB|LMWT' >/dev/null; then +if pwd | grep -E 'JOB|LMWT' >/dev/null; then echo "*** $0: Kaldi scripts will fail if the directory name contains" echo "*** either of the strings 'JOB' or 'LMWT'." - status=1; + status=1 fi if ! $printed && [ $status -eq 0 ]; then diff --git a/tools/extras/install_mkl.sh b/tools/extras/install_mkl.sh new file mode 100755 index 00000000000..fe2ea7bdb65 --- /dev/null +++ b/tools/extras/install_mkl.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# Intel MKL is now freely available even for commercial use. This script +# attempts to install the MKL package automatically from Intel's repository. +# +# For manual repository setup instructions, see: +# https://software.intel.com/articles/installing-intel-free-libs-and-python-yum-repo +# https://software.intel.com/articles/installing-intel-free-libs-and-python-apt-repo +# +# For other package managers, or non-Linux platforms, see: +# https://software.intel.com/mkl/choose-download + +set -o pipefail + +default_package=intel-mkl-64bit-2019.2-057 + +yum_repo='https://yum.repos.intel.com/mkl/setup/intel-mkl.repo' +apt_repo='https://apt.repos.intel.com/mkl' +intel_key_url='https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB' + +Usage () { + cat >&2 <] + +Checks if MKL is present on the system, and/or attempts to install it. + +If is not provided, ${default_package} will be installed. + +Intel packages are installed under the /opt/intel directory. You should be root +to install MKL into this directory; run this script using the sudo command. + +Options: + -s - Skip check for MKL being already present. + -p -- Force type of package management. Use only + if automatic detection fails, as instructed. + -h - Show this message. + +Environment: + CC The C compiler to use for MKL check. If not set, uses 'cc'. +EOF + exit 2 +} + +Fatal () { echo "$0: $@"; exit 1; } + +Have () { type -t "$1" >/dev/null; } + +# Option values. +skip_cc= +distro= + +while getopts ":hksp:" opt; do + case ${opt} in + h) Usage ;; + s) skip_cc=yes ;; + p) case $OPTARG in + suse|redhat|debian|fedora) distro=$OPTARG ;; + *) Fatal "invalid value -p '${OPTARG}'. " \ + "Allowed: 'suse', 'redhat', 'debian' or 'fedora'." + esac ;; + \?) echo >&2 "$0: invalid option -${OPTARG}."; Usage ;; + esac +done +shift $((OPTIND-1)) + +orig_arg_package=${1-''} +package=${1:-$default_package} + +# Check that we are actually on Linux, otherwise give a helpful reference. +[[ $(uname) == Linux ]] || Fatal "\ +This script can be used on Linux only, and your system is $(uname). + +Installer packages for Mac and Windows are available for download from Intel: +https://software.intel.com/mkl/choose-download" + +# Test if MKL is already installed on the system. +if [[ ! $skip_cc ]]; then + : ${CC:=cc} + Have "$CC" || Fatal "\ +C compiler $CC not found. + +You can skip the check for MKL presence by invoking this script with the '-s' +option to this script, but you will need a functional compiler anyway, so we +recommend that you install it first." + + mkl_version=$($CC -E -I /opt/intel/mkl/include - <<< \ + '#include + __INTEL_MKL__.__INTEL_MKL_MINOR__.__INTEL_MKL_UPDATE__' 2>/dev/null | + tail -n 1 ) || mkl_version= + mkl_version=${mkl_version// /} + + [[ $mkl_version ]] && Fatal "\ +MKL version $mkl_version is already installed. + +You can skip the check for MKL presence by invoking this script with the '-s' +option and proceed with automated installation, but we highly discourage +this. This script will register Intel repositories with your system, and it +seems that they have been already registered, or MKL has been installed some +other way. + +You should use your package manager to check which MKL package is already +installed. Note that Intel packages register the latest installed version of +the library as the default. If your installed version is older than +$package, it makes sense to upgrade." +fi + +# Try to determine which package manager the distro uses, unless overridden. +if [[ ! $distro ]]; then + dist_vars=$(cat /etc/os-release 2>/dev/null) + eval "$dist_vars" + for rune in $CPE_NAME $ID $ID_LIKE; do + case "$rune" in + cpe:/o:fedoraproject:fedora:2[01]) distro=redhat; break;; # Use yum. + rhel|centos) distro=redhat; break;; + redhat|suse|fedora|debian) distro=$rune; break;; + esac + done + + # Certain old distributions do not have /etc/os-release. We are unlikely to + # encounter these in the wild, but just in case. + # NOTE: Do not try to guess Fedora specifically here! Fedora 20 and below + # detect as redhat, and this is good, because they use yum by default. + [[ ! $distro && -f /etc/redhat-release ]] && distro=redhat + [[ ! $distro && -f /etc/SuSE-release ]] && distro=suse + [[ ! $distro && -f /etc/debian_release ]] && distro=debian + + [[ ! $distro ]] && Fatal "\ +Unable to determine package management style. + +Invoke this script with the option '-p