From 413826dfa156adb4d3b61cbb1fe685cef1af2eaa Mon Sep 17 00:00:00 2001 From: David Snyder Date: Tue, 23 Feb 2016 22:14:05 -0500 Subject: [PATCH 1/5] xvector: adding EER-based accuracy calculation to xvector diagnostics. --- egs/wsj/s5/steps/nnet3/xvector/train.sh | 8 +- src/xvector/nnet-xvector-diagnostics.cc | 130 +++++++++++++++++++++--- src/xvector/nnet-xvector-diagnostics.h | 11 +- src/xvector/nnet-xvector-training.cc | 9 +- src/xvector/xvector.cc | 3 + src/xvector/xvector.h | 1 + 6 files changed, 137 insertions(+), 25 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh index a05c62c5124..f3f218273c0 100755 --- a/egs/wsj/s5/steps/nnet3/xvector/train.sh +++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh @@ -9,8 +9,8 @@ cmd=run.pl num_epochs=4 # Number of epochs of training; # the number of iterations is worked out from this. num_shifts=3 -initial_effective_lrate=0.0003 -final_effective_lrate=0.00003 +initial_effective_lrate=0.003 +final_effective_lrate=0.0003 num_jobs_initial=2 # Number of neural net jobs to run in parallel at the start of training num_jobs_final=8 # Number of neural net jobs to run in parallel at the end of training stage=-3 @@ -133,10 +133,10 @@ while [ $x -lt $num_iters ]; do # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \ - nnet3-xvector-compute-prob $dir/$x.raw \ + nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \ "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/valid_diagnostic_egs.JOB.ark ark:- |" & $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_train.$x.JOB.log \ - nnet3-xvector-compute-prob $dir/$x.raw \ + nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \ "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/train_diagnostic_egs.JOB.ark ark:- |" & if [ $x -gt 0 ]; then diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc index 74f2ce2e6aa..5f60816b22e 100644 --- a/src/xvector/nnet-xvector-diagnostics.cc +++ b/src/xvector/nnet-xvector-diagnostics.cc @@ -36,6 +36,10 @@ NnetXvectorComputeProb::NnetXvectorComputeProb(const NnetComputeProbOptions &con bool is_gradient = true; // force simple update SetZero(is_gradient, deriv_nnet_); } + if (config_.compute_accuracy) + need_eer_threshold_ = true; + else + need_eer_threshold_ = false; } const Nnet &NnetXvectorComputeProb::GetDeriv() const { @@ -51,6 +55,7 @@ NnetXvectorComputeProb::~NnetXvectorComputeProb() { void NnetXvectorComputeProb::Reset() { num_minibatches_processed_ = 0; objf_info_.clear(); + acc_info_.clear(); if (deriv_nnet_) { bool is_gradient = true; SetZero(is_gradient, deriv_nnet_); @@ -80,46 +85,66 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) { if (nnet_.IsOutputNode(node_index)) { std::string xvector_name = nnet_.GetNodeName(node_index), s_name = "s", b_name = "b"; - if (nnet_.GetNodeIndex(s_name) == -1 || nnet_.GetNodeIndex(b_name) == -1) - KALDI_ERR << "The nnet expected to have two output nodes with name s and b."; + if (nnet_.GetNodeIndex(s_name) == -1 + || nnet_.GetNodeIndex(b_name) == -1) + KALDI_ERR << "Expected the nnet to have two output nodes with name " + << "s and b."; if (xvector_name != s_name && xvector_name != b_name) { - const CuMatrixBase &xvector_pairs = computer->GetOutput(xvector_name), - &xvec_s = computer->GetOutput(s_name), - &xvec_b = computer->GetOutput(b_name); - CuMatrix xvector_deriv(xvector_pairs.NumRows(), xvector_pairs.NumCols(), - kUndefined); - int32 s_dim = xvector_pairs.NumCols() * (xvector_pairs.NumCols() + 1) / 2; + const CuMatrixBase &xvector_pairs = computer->GetOutput( + xvector_name), + &xvec_s = computer->GetOutput( + s_name), + &xvec_b = computer->GetOutput( + b_name); + int32 num_rows = xvector_pairs.NumRows(), + dim_xvector = xvector_pairs.NumCols(); + int32 s_dim = dim_xvector * (dim_xvector + 1) / 2; + + CuMatrix xvector_deriv(num_rows, + dim_xvector, + kUndefined), + raw_scores(num_rows, num_rows); // convert CuVector to CuSpMatrix - CuSpMatrix xvec_s_sp(xvector_pairs.NumCols()); + CuSpMatrix xvec_s_sp(dim_xvector); xvec_s_sp.CopyFromVec(xvec_s.Row(0)); CuVector deriv_s(s_dim); BaseFloat xvec_b_val = xvec_b(0,0), deriv_b; BaseFloat tot_weight, tot_objf; bool supply_deriv = config_.compute_deriv; + bool compute_accuracy = config_.compute_accuracy; ComputeXvectorObjfAndDeriv(xvector_pairs, xvec_s_sp, xvec_b_val, (supply_deriv ? &xvector_deriv : NULL), (supply_deriv ? &deriv_s : NULL), (supply_deriv ? &deriv_b : NULL), + (compute_accuracy ? &raw_scores : NULL), &tot_objf, &tot_weight); if (supply_deriv) { CuMatrix deriv_s_mat(1, s_dim), - deriv_b_mat(1,1); + deriv_b_mat(1,1); deriv_b_mat(0,0) = deriv_b; deriv_s_mat.CopyRowsFromVec(deriv_s); computer->AcceptOutputDeriv(xvector_name, &xvector_deriv); computer->AcceptOutputDeriv(s_name, &deriv_s_mat); computer->AcceptOutputDeriv(b_name, &deriv_b_mat); - } + SimpleObjectiveInfo &totals = objf_info_[xvector_name]; totals.tot_weight += tot_weight; totals.tot_objective += tot_objf; + + if (compute_accuracy) { + BaseFloat tot_acc, tot_weight_acc; + SimpleObjectiveInfo &acc_totals = acc_info_[xvector_name]; + ComputeAccuracy(raw_scores, &tot_weight_acc, &tot_acc); + acc_totals.tot_objective += tot_weight_acc * tot_acc; + acc_totals.tot_weight += tot_weight_acc; + } + num_minibatches_processed_++; } - num_minibatches_processed_++; } } } @@ -140,15 +165,69 @@ bool NnetXvectorComputeProb::PrintTotalStats() const { KALDI_LOG << "Overall " << (obj_type == kLinear ? "log-likelihood" : "objective") << " for '" << name << "' is " - << (info.tot_objective / info.tot_weight) << " per frame" - << ", over " << info.tot_weight << " frames."; + << (info.tot_objective / info.tot_weight) << " per chunk" + << ", over " << info.tot_weight << " chunk."; if (info.tot_weight > 0) ans = true; } } + if (config_.compute_accuracy) { // now print equal error-rates. + iter = acc_info_.begin(); + end = acc_info_.end(); + for (; iter != end; ++iter) { + const std::string &name = iter->first; + const SimpleObjectiveInfo &info = iter->second; + KALDI_LOG << "Overall accuracy for '" << name << "' is " + << (info.tot_objective / info.tot_weight) << " per pair of chunks" + << ", over " << info.tot_weight << " pairs of chunks."; + // don't bother changing ans; the loop over the regular objective should + // already have set it to true if we got any data. + } + } return ans; } +void NnetXvectorComputeProb::ComputeAccuracy( + const CuMatrixBase &raw_scores, + BaseFloat *tot_weight_out, + BaseFloat *tot_accuracy_out) { + int32 num_rows = raw_scores.NumCols(); + if (need_eer_threshold_) { + std::vector target_scores; + std::vector nontarget_scores; + for (int32 i = 0; i < num_rows; i++) { + for (int32 j = 0; j < num_rows; j++) { + if (i + 1 == j && i % 2 == 0) { + target_scores.push_back(raw_scores(i, j)); + } else if (i < j) { + nontarget_scores.push_back(raw_scores(i, j)); + } + } + } + (*tot_accuracy_out) = 1.0 - ComputeEer(&target_scores, &nontarget_scores); + (*tot_weight_out) = target_scores.size() + nontarget_scores.size(); + need_eer_threshold_ = false; + } else { + int32 count = 0, + error = 0; + for (int32 i = 0; i < num_rows; i++) { + for (int32 j = 0; j < num_rows; j++) { + if (i + 1 == j && i % 2 == 0) { + if (raw_scores(i, j) < eer_threshold_) + error++; + count++; + } else if (i < j) { + if (raw_scores(i, j) >= eer_threshold_) + error++; + count++; + } + } + } + (*tot_accuracy_out) = 1.0 - static_cast(error) / count; + (*tot_weight_out) = count; + } +} + const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective( const std::string &output_name) const { unordered_map::const_iterator @@ -159,5 +238,28 @@ const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective( return NULL; } +BaseFloat NnetXvectorComputeProb::ComputeEer(std::vector *target_scores, + std::vector *nontarget_scores) { + KALDI_ASSERT(!target_scores->empty() && !nontarget_scores->empty()); + std::sort(target_scores->begin(), target_scores->end()); + std::sort(nontarget_scores->begin(), nontarget_scores->end()); + + int32 target_position = 0, + target_size = target_scores->size(); + for (; target_position + 1 < target_size; target_position++) { + int32 nontarget_size = nontarget_scores->size(), + nontarget_n = nontarget_size * target_position * 1.0 / target_size, + nontarget_position = nontarget_size - 1 - nontarget_n; + if (nontarget_position < 0) + nontarget_position = 0; + if ((*nontarget_scores)[nontarget_position] < + (*target_scores)[target_position]) + break; + } + eer_threshold_ = (*target_scores)[target_position]; + BaseFloat eer = target_position * 1.0 / target_size; + return eer; +} + } // namespace nnet3 } // namespace kaldi diff --git a/src/xvector/nnet-xvector-diagnostics.h b/src/xvector/nnet-xvector-diagnostics.h index 046088518b1..7dff86270f4 100644 --- a/src/xvector/nnet-xvector-diagnostics.h +++ b/src/xvector/nnet-xvector-diagnostics.h @@ -71,6 +71,11 @@ class NnetXvectorComputeProb { ~NnetXvectorComputeProb(); private: void ProcessOutputs(NnetComputer *computer); + BaseFloat ComputeEer(std::vector *target_scores, + std::vector *nontarget_scores); + void ComputeAccuracy(const CuMatrixBase &raw_scores, + BaseFloat *tot_weight_out, + BaseFloat *tot_accuracy_out); NnetComputeProbOptions config_; const Nnet &nnet_; @@ -80,12 +85,12 @@ class NnetXvectorComputeProb { // this is only for diagnostics. int32 num_minibatches_processed_; - + bool need_eer_threshold_; + BaseFloat eer_threshold_; unordered_map objf_info_; - + unordered_map acc_info_; }; - } // namespace nnet3 } // namespace kaldi diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc index 7327af90d45..dc512937882 100644 --- a/src/xvector/nnet-xvector-training.cc +++ b/src/xvector/nnet-xvector-training.cc @@ -130,6 +130,7 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) { (supply_deriv ? &xvector_deriv : NULL), (supply_deriv ? &deriv_s : NULL), (supply_deriv ? &deriv_b : NULL), + NULL, &tot_objf, &tot_weight); @@ -269,7 +270,7 @@ void GetComputationRequestXvector(const Nnet &nnet, // We only need the output on frame t=0 for each n. int32 io_index_size = request->inputs[0].indexes.size(), n_indx_size = 0; - std::vector output_indexes, + std::vector output_indexes, affine_output_indexes; affine_output_indexes.resize(1); affine_output_indexes[0].n = 0; @@ -284,7 +285,7 @@ void GetComputationRequestXvector(const Nnet &nnet, output_indexes[indx].n = indx; output_indexes[indx].t = 0; } - + // In order to generate computation request for output nodes, // we should find output nodes and add io_spec for each one. int32 num_nodes = nnet.NumNodes(); @@ -294,8 +295,8 @@ void GetComputationRequestXvector(const Nnet &nnet, dest.resize(dest.size() + 1); IoSpecification &io_spec = dest.back(); io_spec.name = nnet.GetNodeName(node_index); - if (nnet.GetNodeName(node_index) == "s" || - nnet.GetNodeName(node_index) == "b") + if (nnet.GetNodeName(node_index) == "s" || + nnet.GetNodeName(node_index) == "b") io_spec.indexes = affine_output_indexes; else io_spec.indexes = output_indexes; diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc index 604d70e9c14..f7a734e16b3 100644 --- a/src/xvector/xvector.cc +++ b/src/xvector/xvector.cc @@ -26,6 +26,7 @@ void ComputeXvectorObjfAndDeriv( const CuSpMatrix &S, BaseFloat b, CuMatrixBase *deriv_xvector, CuVector *deriv_S, BaseFloat *deriv_b, + CuMatrixBase *raw_scores, BaseFloat *tot_objf, BaseFloat *tot_weight) { @@ -61,6 +62,8 @@ void ComputeXvectorObjfAndDeriv( scores.AddMat(-1.0, R, kTrans); scores.AddMat(-1.0, R, kNoTrans); scores.Add(b); + if (raw_scores != NULL) + raw_scores->CopyFromMat(scores); cu::ComputeXvectorObjfFromScores(scores, &objf_terms, &objf_deriv_terms); CuVector objf_terms_vec(N); diff --git a/src/xvector/xvector.h b/src/xvector/xvector.h index 75083533acd..9ddc2d674fd 100644 --- a/src/xvector/xvector.h +++ b/src/xvector/xvector.h @@ -69,6 +69,7 @@ namespace kaldi { CuMatrixBase *deriv_xvector, CuVector *deriv_S, BaseFloat *deriv_b, + CuMatrixBase *raw_scores, BaseFloat *tot_objf, BaseFloat *tot_weight); } // namespace kaldi From acc4eebcc57dde0e7afe2fcf6af18d8eaf426b6f Mon Sep 17 00:00:00 2001 From: Pegita Date: Thu, 25 Feb 2016 17:14:19 -0500 Subject: [PATCH 2/5] fixed bugs and issues with xvector setup. --- egs/swbd/s5c/local/xvector/train.sh | 19 ++++++--- .../steps/nnet3/xvector/make_jesus_configs.py | 2 +- egs/wsj/s5/steps/nnet3/xvector/train.sh | 11 +++-- src/xvector/nnet-xvector-training.cc | 40 +++++++++++++------ src/xvector/xvector.cc | 3 ++ 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh index f0499ee5741..6dca8b99458 100755 --- a/egs/swbd/s5c/local/xvector/train.sh +++ b/egs/swbd/s5c/local/xvector/train.sh @@ -7,10 +7,13 @@ set -e stage=1 -train_stage=1 +train_stage=-10 generate_alignments=true # false if doing ctc training speed_perturb=true - +init_lr=0.003 +final_lr=0.0003 +max_change=2.0 +use_gpu=true feat_dim=40 # this is the MFCC dim we use in the hires features. you can't change it # unless you change local/xvector/prepare_perturbed_data.sh to use a different # MFCC config with a different dimension. @@ -18,6 +21,7 @@ data=data/train_nodup_sp_hires # you can't change this without changing # local/xvector/prepare_perturbed_data.sh xvector_dim=200 # dimension of the xVector. configurable. xvector_dir=exp/xvector_a +egs_dir=exp/xvector_a/egs . ./path.sh @@ -40,18 +44,21 @@ if [ $stage -le 3 ]; then $xvector_dir/nnet.config fi -if [ $stage -le 4 ]; then +if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then # dump egs. steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ - "$data" $xvector_dir/egs + "$data" $egs_dir fi if [ $stage -le 5 ]; then # training for 4 epochs * 3 shifts means we see each eg 12 # times (3 different frame-shifts of the same eg are counted as different). steps/nnet3/xvector/train.sh --cmd "$train_cmd" \ - --num-epochs 4 --num-shifts 3 \ - --num-jobs-initial 2 --num-jobs-final 8 \ + --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \ + --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \ + --num-jobs-initial 1 --num-jobs-final 8 \ + --max-param-change $max_change \ + --egs-dir $egs_dir \ $xvector_dir fi diff --git a/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py b/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py index 51d58c5b89c..61eb2d41c24 100755 --- a/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py +++ b/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py @@ -271,7 +271,7 @@ def WriteConfigs(self, f): # just have an affine component for the first hidden layer. # we don't need a nonlinearity as there is one at the input of # the jesus component. - print('component name=x-affine1 type=AffineComponent ' + print('component name=x-affine1 type=NaturalGradientAffineComponent ' 'input-dim={0} output-dim={1} bias-stddev=0'.format( cur_dim, args.jesus_input_dim), file=f) print('component-node name=x-affine1 component=x-affine1 input={0}'.format( diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh index a05c62c5124..f79c2680b1c 100755 --- a/egs/wsj/s5/steps/nnet3/xvector/train.sh +++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh @@ -9,8 +9,8 @@ cmd=run.pl num_epochs=4 # Number of epochs of training; # the number of iterations is worked out from this. num_shifts=3 -initial_effective_lrate=0.0003 -final_effective_lrate=0.00003 +initial_effective_lrate=0.003 +final_effective_lrate=0.0003 num_jobs_initial=2 # Number of neural net jobs to run in parallel at the start of training num_jobs_final=8 # Number of neural net jobs to run in parallel at the end of training stage=-3 @@ -129,7 +129,7 @@ while [ $x -lt $num_iters ]; do if [ $stage -le $x ]; then echo "On iteration $x, learning rate is $this_learning_rate" - + raw="nnet3-copy --learning-rate=$this_learning_rate $dir/$x.raw - |" # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \ @@ -142,7 +142,7 @@ while [ $x -lt $num_iters ]; do if [ $x -gt 0 ]; then $cmd $dir/log/progress.$x.log \ nnet3-info $dir/$x.raw '&&' \ - nnet3-show-progress --use-gpu=no $dir/$[$x-1].raw $dir/$x.raw & + nnet3-show-progress --use-gpu=no $dir/$[$x-1].raw $dir/$x.raw & fi echo "Training neural net (pass $x)" @@ -174,8 +174,7 @@ while [ $x -lt $num_iters ]; do $cmd $train_queue_opt $dir/log/train.$x.$n.log \ nnet3-xvector-train $parallel_train_opts --print-interval=10 \ - --max-param-change=$max_param_change \ - $dir/$x.raw \ + --max-param-change=$max_param_change "$raw" \ "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & done diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc index 7327af90d45..5294879e69f 100644 --- a/src/xvector/nnet-xvector-training.cc +++ b/src/xvector/nnet-xvector-training.cc @@ -30,13 +30,14 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config, nnet_(nnet), compiler_(*nnet, config_.optimize_config), num_minibatches_processed_(0) { - if (config.zero_component_stats) + if (config_.zero_component_stats) ZeroComponentStats(nnet); - if (config.momentum == 0.0 && config.max_param_change == 0.0) { + if (config_.momentum == 0.0 && + config_.max_param_change == 0.0) { delta_nnet_= NULL; } else { - KALDI_ASSERT(config.momentum >= 0.0 && - config.max_param_change >= 0.0); + KALDI_ASSERT(config_.momentum >= 0.0 && + config_.max_param_change >= 0.0); delta_nnet_ = nnet_->Copy(); bool is_gradient = false; // setting this to true would disable the // natural-gradient updates. @@ -94,7 +95,8 @@ void NnetXvectorTrainer::Train(const NnetExample &eg) { ScaleNnet(config_.momentum, delta_nnet_); } if (config_.write_cache != "") { - Output ko(config_.write_cache, config_.binary_write_cache); + Output ko(config_.write_cache, + config_.binary_write_cache); compiler_.WriteCache(ko.Stream(), config_.binary_write_cache); } } @@ -143,7 +145,8 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) { computer->AcceptOutputDeriv(b_name, &deriv_b_mat); } - objf_info_[xvector_name].UpdateStats(xvector_name, config_.print_interval, + objf_info_[xvector_name].UpdateStats(xvector_name, + config_.print_interval, num_minibatches_processed_++, tot_weight, tot_objf); } @@ -246,7 +249,7 @@ void GetComputationRequestXvector(const Nnet &nnet, request->need_model_derivative = need_model_derivative; request->store_component_stats = store_component_stats; - // xvector-egs have multiple inputs(e.g. different inputs correspond + // xvector-egs has multiple inputs(e.g. different inputs correspond // to different chunks and no outputs. for (size_t i = 0; i < eg.io.size(); i++) { const NnetIo &io = eg.io[i]; @@ -263,21 +266,34 @@ void GetComputationRequestXvector(const Nnet &nnet, IoSpecification &io_spec = dest.back(); io_spec.name = name; io_spec.indexes = io.indexes; - io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative; + io_spec.has_deriv = false; } // We only need the output on frame t=0 for each n. + // So the output index for output node is (n, 0, 0) + // for n = 0,.., min number of n-values for different t + // in input indexes. + // indexes for "s" and "b" output nodes are equal to (0,0,0). int32 io_index_size = request->inputs[0].indexes.size(), - n_indx_size = 0; + n_indx_size = 1e6, t_ind; std::vector output_indexes, affine_output_indexes; affine_output_indexes.resize(1); affine_output_indexes[0].n = 0; affine_output_indexes[0].t = 0; + + std::map n_indx_sizes; + for (int32 indx = 0; indx < io_index_size; indx++) { + t_ind = request->inputs[0].indexes[indx].t; + if (n_indx_sizes.count(t_ind) != 0) + n_indx_sizes[t_ind] += 1; + else + n_indx_sizes.insert(std::make_pair(t_ind, 1)); + } + std::map::const_iterator iter; + for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++) + n_indx_size = std::min(n_indx_size, iter->second); - for (int32 indx = 0; indx < io_index_size; indx++) - if (request->inputs[0].indexes[indx].t == 0) - n_indx_size++; output_indexes.resize(n_indx_size); for (int32 indx = 0; indx < n_indx_size; indx++) { diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc index 604d70e9c14..aab825ba60b 100644 --- a/src/xvector/xvector.cc +++ b/src/xvector/xvector.cc @@ -40,6 +40,9 @@ void ComputeXvectorObjfAndDeriv( KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim); KALDI_ASSERT(deriv_xvector->NumRows() == N); KALDI_ASSERT(deriv_S->Dim() == S_dim); + deriv_xvector->Set(0.0); + deriv_S->Set(0.0); + (*deriv_b) = 0.0; } CuMatrix S_tmp(S), From 862f964b91880cffbdcb5cb9710689e23a0ae8dd Mon Sep 17 00:00:00 2001 From: David Snyder Date: Thu, 25 Feb 2016 21:22:03 -0500 Subject: [PATCH 3/5] xvector: resolving merge issues --- .../local/xvector/prepare_perturbed_data.sh | 9 ++++---- egs/swbd/s5c/local/xvector/train.sh | 21 +++++++++++++------ egs/wsj/s5/steps/nnet3/xvector/train.sh | 2 +- src/xvector/nnet-xvector-diagnostics.cc | 2 +- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh index 7ce4d553733..ca11fe2f283 100755 --- a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh +++ b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh @@ -10,6 +10,8 @@ stage=1 train_stage=-10 generate_alignments=true # false if doing ctc training speed_perturb=true +mfcc_config=conf/mfcc_hires.conf +mfccdir=mfcc . ./path.sh . ./utils/parse_options.sh @@ -27,13 +29,12 @@ if [ $stage -le 1 ]; then if [ -f data/${datadir}_sp_hires/feats.scp ]; then echo "$0: directory data/${datadir}_sp_hires/feats.scp already exists, skipping creating it." else - mfccdir=mfcc utils/copy_data_dir.sh data/${datadir}_sp data/${datadir}_sp_hires - steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \ + steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config $mfcc_config \ data/${datadir}_sp_hires exp/make_mfcc/${datadir}_sp_hires $mfccdir || exit 1; # we typically won't need the cmvn stats when using hires features-- it's # mostly for neural nets. - utils/fix_data_dir.sh data/${dataset}_sp_hires # remove segments with problems + utils/fix_data_dir.sh data/${datadir}_sp_hires # remove segments with problems fi done fi @@ -50,7 +51,7 @@ if [ $stage -le 2 ]; then echo "$0: data/${dataset}_hires/feats.scp already exists, skipping mfcc generation" else utils/copy_data_dir.sh data/$dataset data/${dataset}_hires - steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config $mfcc_config \ data/${dataset}_hires exp/make_hires/$dataset $mfccdir; steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir; utils/fix_data_dir.sh data/${dataset}_hires # remove segments with problems diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh index f0499ee5741..64622f5dc2d 100755 --- a/egs/swbd/s5c/local/xvector/train.sh +++ b/egs/swbd/s5c/local/xvector/train.sh @@ -7,10 +7,13 @@ set -e stage=1 -train_stage=1 +train_stage=-10 generate_alignments=true # false if doing ctc training speed_perturb=true - +init_lr=0.003 +final_lr=0.0003 +max_change=2.0 +use_gpu=true feat_dim=40 # this is the MFCC dim we use in the hires features. you can't change it # unless you change local/xvector/prepare_perturbed_data.sh to use a different # MFCC config with a different dimension. @@ -18,6 +21,7 @@ data=data/train_nodup_sp_hires # you can't change this without changing # local/xvector/prepare_perturbed_data.sh xvector_dim=200 # dimension of the xVector. configurable. xvector_dir=exp/xvector_a +egs_dir=exp/xvector_a/egs . ./path.sh @@ -33,6 +37,8 @@ if [ $stage -le 3 ]; then $train_cmd $xvector_dir/log/make_configs.log \ steps/nnet3/xvector/make_jesus_configs.py \ + --jesus-stddev-scale=1.0 \ + --s-scale=0.05 --b-scale=0.05 --output-scale=0.05 \ --splice-indexes="-1,0,1 -2,-1,0,1 -3,0,3 mean+stddev+count(-99:3:9:0) 0" \ --feat-dim $feat_dim --output-dim $xvector_dim \ --num-jesus-blocks 100 \ @@ -40,18 +46,21 @@ if [ $stage -le 3 ]; then $xvector_dir/nnet.config fi -if [ $stage -le 4 ]; then +if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then # dump egs. steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ - "$data" $xvector_dir/egs + "$data" $egs_dir fi if [ $stage -le 5 ]; then # training for 4 epochs * 3 shifts means we see each eg 12 # times (3 different frame-shifts of the same eg are counted as different). steps/nnet3/xvector/train.sh --cmd "$train_cmd" \ - --num-epochs 4 --num-shifts 3 \ - --num-jobs-initial 2 --num-jobs-final 8 \ + --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \ + --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \ + --num-jobs-initial 1 --num-jobs-final 8 \ + --max-param-change $max_change \ + --egs-dir $egs_dir \ $xvector_dir fi diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh index 44ca0e18015..1377728219c 100755 --- a/egs/wsj/s5/steps/nnet3/xvector/train.sh +++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh @@ -175,7 +175,7 @@ while [ $x -lt $num_iters ]; do $cmd $train_queue_opt $dir/log/train.$x.$n.log \ nnet3-xvector-train $parallel_train_opts --print-interval=10 \ - --max-param-change=$max_param_change --diss-scale=$diss_scale "$raw" \ + --max-param-change=$max_param_change "$raw" \ "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & done diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc index 119c261b7d4..dce3072f85d 100644 --- a/src/xvector/nnet-xvector-diagnostics.cc +++ b/src/xvector/nnet-xvector-diagnostics.cc @@ -166,7 +166,7 @@ bool NnetXvectorComputeProb::PrintTotalStats() const { << (obj_type == kLinear ? "log-likelihood" : "objective") << " for '" << name << "' is " << (info.tot_objective / info.tot_weight) << " per chunk" - << ", over " << info.tot_weight << " chunk."; + << ", over " << info.tot_weight << " chunks."; if (info.tot_weight > 0) ans = true; } From 20bb518b3073edfef8f680a9ae63fc894891129b Mon Sep 17 00:00:00 2001 From: David Snyder Date: Thu, 25 Feb 2016 21:41:14 -0500 Subject: [PATCH 4/5] xvector: fixing merge problems --- egs/swbd/s5c/local/xvector/train.sh | 2 -- egs/wsj/s5/steps/nnet3/xvector/train.sh | 1 - src/xvector/nnet-xvector-training.cc | 26 ++++++++++++++++--------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh index 64622f5dc2d..6dca8b99458 100755 --- a/egs/swbd/s5c/local/xvector/train.sh +++ b/egs/swbd/s5c/local/xvector/train.sh @@ -37,8 +37,6 @@ if [ $stage -le 3 ]; then $train_cmd $xvector_dir/log/make_configs.log \ steps/nnet3/xvector/make_jesus_configs.py \ - --jesus-stddev-scale=1.0 \ - --s-scale=0.05 --b-scale=0.05 --output-scale=0.05 \ --splice-indexes="-1,0,1 -2,-1,0,1 -3,0,3 mean+stddev+count(-99:3:9:0) 0" \ --feat-dim $feat_dim --output-dim $xvector_dim \ --num-jesus-blocks 100 \ diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh index 1377728219c..bbdeefc6562 100755 --- a/egs/wsj/s5/steps/nnet3/xvector/train.sh +++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh @@ -8,7 +8,6 @@ cmd=run.pl num_epochs=4 # Number of epochs of training; # the number of iterations is worked out from this. -diss_scale=1.0 # scale value used to scale the dissimalarity part in objective function. num_shifts=3 initial_effective_lrate=0.003 final_effective_lrate=0.0003 diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc index dc512937882..f7fb430ad2c 100644 --- a/src/xvector/nnet-xvector-training.cc +++ b/src/xvector/nnet-xvector-training.cc @@ -30,13 +30,13 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config, nnet_(nnet), compiler_(*nnet, config_.optimize_config), num_minibatches_processed_(0) { - if (config.zero_component_stats) + if (config_.zero_component_stats) ZeroComponentStats(nnet); if (config.momentum == 0.0 && config.max_param_change == 0.0) { delta_nnet_= NULL; } else { - KALDI_ASSERT(config.momentum >= 0.0 && - config.max_param_change >= 0.0); + KALDI_ASSERT(config_.momentum >= 0.0 && + config_.max_param_change >= 0.0); delta_nnet_ = nnet_->Copy(); bool is_gradient = false; // setting this to true would disable the // natural-gradient updates. @@ -247,7 +247,7 @@ void GetComputationRequestXvector(const Nnet &nnet, request->need_model_derivative = need_model_derivative; request->store_component_stats = store_component_stats; - // xvector-egs have multiple inputs(e.g. different inputs correspond + // xvector-egs has multiple inputs(e.g. different inputs correspond // to different chunks and no outputs. for (size_t i = 0; i < eg.io.size(); i++) { const NnetIo &io = eg.io[i]; @@ -264,21 +264,29 @@ void GetComputationRequestXvector(const Nnet &nnet, IoSpecification &io_spec = dest.back(); io_spec.name = name; io_spec.indexes = io.indexes; - io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative; + io_spec.has_deriv = false; } // We only need the output on frame t=0 for each n. int32 io_index_size = request->inputs[0].indexes.size(), - n_indx_size = 0; + n_indx_size = 1e6, t_ind; std::vector output_indexes, affine_output_indexes; affine_output_indexes.resize(1); affine_output_indexes[0].n = 0; affine_output_indexes[0].t = 0; - for (int32 indx = 0; indx < io_index_size; indx++) - if (request->inputs[0].indexes[indx].t == 0) - n_indx_size++; + std::map n_indx_sizes; + for (int32 indx = 0; indx < io_index_size; indx++) { + t_ind = request->inputs[0].indexes[indx].t; + if (n_indx_sizes.count(t_ind) != 0) + n_indx_sizes[t_ind] += 1; + else + n_indx_sizes.insert(std::make_pair(t_ind, 1)); + } + std::map::const_iterator iter; + for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++) + n_indx_size = std::min(n_indx_size, iter->second); output_indexes.resize(n_indx_size); for (int32 indx = 0; indx < n_indx_size; indx++) { From cd58d06fb50ae5782256f1ea3c0f43525b0c44ab Mon Sep 17 00:00:00 2001 From: David Snyder Date: Thu, 25 Feb 2016 21:47:00 -0500 Subject: [PATCH 5/5] xvector: resolving merge issues --- src/xvector/nnet-xvector-diagnostics.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc index dce3072f85d..6648983b18e 100644 --- a/src/xvector/nnet-xvector-diagnostics.cc +++ b/src/xvector/nnet-xvector-diagnostics.cc @@ -101,10 +101,9 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) { dim_xvector = xvector_pairs.NumCols(); int32 s_dim = dim_xvector * (dim_xvector + 1) / 2; - CuMatrix xvector_deriv(num_rows, - dim_xvector, + CuMatrix xvector_deriv(num_rows, dim_xvector, kUndefined), - raw_scores(num_rows, num_rows); + raw_scores(num_rows, num_rows, kUndefined); // convert CuVector to CuSpMatrix CuSpMatrix xvec_s_sp(dim_xvector);