diff --git a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh index 7ce4d553733..ca11fe2f283 100755 --- a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh +++ b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh @@ -10,6 +10,8 @@ stage=1 train_stage=-10 generate_alignments=true # false if doing ctc training speed_perturb=true +mfcc_config=conf/mfcc_hires.conf +mfccdir=mfcc . ./path.sh . ./utils/parse_options.sh @@ -27,13 +29,12 @@ if [ $stage -le 1 ]; then if [ -f data/${datadir}_sp_hires/feats.scp ]; then echo "$0: directory data/${datadir}_sp_hires/feats.scp already exists, skipping creating it." else - mfccdir=mfcc utils/copy_data_dir.sh data/${datadir}_sp data/${datadir}_sp_hires - steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \ + steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config $mfcc_config \ data/${datadir}_sp_hires exp/make_mfcc/${datadir}_sp_hires $mfccdir || exit 1; # we typically won't need the cmvn stats when using hires features-- it's # mostly for neural nets. - utils/fix_data_dir.sh data/${dataset}_sp_hires # remove segments with problems + utils/fix_data_dir.sh data/${datadir}_sp_hires # remove segments with problems fi done fi @@ -50,7 +51,7 @@ if [ $stage -le 2 ]; then echo "$0: data/${dataset}_hires/feats.scp already exists, skipping mfcc generation" else utils/copy_data_dir.sh data/$dataset data/${dataset}_hires - steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config $mfcc_config \ data/${dataset}_hires exp/make_hires/$dataset $mfccdir; steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir; utils/fix_data_dir.sh data/${dataset}_hires # remove segments with problems diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh index f0499ee5741..6dca8b99458 100755 --- a/egs/swbd/s5c/local/xvector/train.sh +++ b/egs/swbd/s5c/local/xvector/train.sh @@ -7,10 +7,13 @@ set -e stage=1 -train_stage=1 +train_stage=-10 generate_alignments=true # false if doing ctc training speed_perturb=true - +init_lr=0.003 +final_lr=0.0003 +max_change=2.0 +use_gpu=true feat_dim=40 # this is the MFCC dim we use in the hires features. you can't change it # unless you change local/xvector/prepare_perturbed_data.sh to use a different # MFCC config with a different dimension. @@ -18,6 +21,7 @@ data=data/train_nodup_sp_hires # you can't change this without changing # local/xvector/prepare_perturbed_data.sh xvector_dim=200 # dimension of the xVector. configurable. xvector_dir=exp/xvector_a +egs_dir=exp/xvector_a/egs . ./path.sh @@ -40,18 +44,21 @@ if [ $stage -le 3 ]; then $xvector_dir/nnet.config fi -if [ $stage -le 4 ]; then +if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then # dump egs. steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ - "$data" $xvector_dir/egs + "$data" $egs_dir fi if [ $stage -le 5 ]; then # training for 4 epochs * 3 shifts means we see each eg 12 # times (3 different frame-shifts of the same eg are counted as different). steps/nnet3/xvector/train.sh --cmd "$train_cmd" \ - --num-epochs 4 --num-shifts 3 \ - --num-jobs-initial 2 --num-jobs-final 8 \ + --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \ + --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \ + --num-jobs-initial 1 --num-jobs-final 8 \ + --max-param-change $max_change \ + --egs-dir $egs_dir \ $xvector_dir fi diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh index e6ee22cfb16..bbdeefc6562 100755 --- a/egs/wsj/s5/steps/nnet3/xvector/train.sh +++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh @@ -8,7 +8,6 @@ cmd=run.pl num_epochs=4 # Number of epochs of training; # the number of iterations is worked out from this. -diss_scale=1.0 # scale value used to scale the dissimalarity part in objective function. num_shifts=3 initial_effective_lrate=0.003 final_effective_lrate=0.0003 @@ -134,10 +133,10 @@ while [ $x -lt $num_iters ]; do # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \ - nnet3-xvector-compute-prob $dir/$x.raw \ + nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \ "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/valid_diagnostic_egs.JOB.ark ark:- |" & $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_train.$x.JOB.log \ - nnet3-xvector-compute-prob $dir/$x.raw \ + nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \ "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/train_diagnostic_egs.JOB.ark ark:- |" & if [ $x -gt 0 ]; then @@ -175,7 +174,7 @@ while [ $x -lt $num_iters ]; do $cmd $train_queue_opt $dir/log/train.$x.$n.log \ nnet3-xvector-train $parallel_train_opts --print-interval=10 \ - --max-param-change=$max_param_change --diss-scale=$diss_scale "$raw" \ + --max-param-change=$max_param_change "$raw" \ "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & done diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc index 74f2ce2e6aa..6648983b18e 100644 --- a/src/xvector/nnet-xvector-diagnostics.cc +++ b/src/xvector/nnet-xvector-diagnostics.cc @@ -36,6 +36,10 @@ NnetXvectorComputeProb::NnetXvectorComputeProb(const NnetComputeProbOptions &con bool is_gradient = true; // force simple update SetZero(is_gradient, deriv_nnet_); } + if (config_.compute_accuracy) + need_eer_threshold_ = true; + else + need_eer_threshold_ = false; } const Nnet &NnetXvectorComputeProb::GetDeriv() const { @@ -51,6 +55,7 @@ NnetXvectorComputeProb::~NnetXvectorComputeProb() { void NnetXvectorComputeProb::Reset() { num_minibatches_processed_ = 0; objf_info_.clear(); + acc_info_.clear(); if (deriv_nnet_) { bool is_gradient = true; SetZero(is_gradient, deriv_nnet_); @@ -80,46 +85,65 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) { if (nnet_.IsOutputNode(node_index)) { std::string xvector_name = nnet_.GetNodeName(node_index), s_name = "s", b_name = "b"; - if (nnet_.GetNodeIndex(s_name) == -1 || nnet_.GetNodeIndex(b_name) == -1) - KALDI_ERR << "The nnet expected to have two output nodes with name s and b."; + if (nnet_.GetNodeIndex(s_name) == -1 + || nnet_.GetNodeIndex(b_name) == -1) + KALDI_ERR << "Expected the nnet to have two output nodes with name " + << "s and b."; if (xvector_name != s_name && xvector_name != b_name) { - const CuMatrixBase &xvector_pairs = computer->GetOutput(xvector_name), - &xvec_s = computer->GetOutput(s_name), - &xvec_b = computer->GetOutput(b_name); - CuMatrix xvector_deriv(xvector_pairs.NumRows(), xvector_pairs.NumCols(), - kUndefined); - int32 s_dim = xvector_pairs.NumCols() * (xvector_pairs.NumCols() + 1) / 2; + const CuMatrixBase &xvector_pairs = computer->GetOutput( + xvector_name), + &xvec_s = computer->GetOutput( + s_name), + &xvec_b = computer->GetOutput( + b_name); + int32 num_rows = xvector_pairs.NumRows(), + dim_xvector = xvector_pairs.NumCols(); + int32 s_dim = dim_xvector * (dim_xvector + 1) / 2; + + CuMatrix xvector_deriv(num_rows, dim_xvector, + kUndefined), + raw_scores(num_rows, num_rows, kUndefined); // convert CuVector to CuSpMatrix - CuSpMatrix xvec_s_sp(xvector_pairs.NumCols()); + CuSpMatrix xvec_s_sp(dim_xvector); xvec_s_sp.CopyFromVec(xvec_s.Row(0)); CuVector deriv_s(s_dim); BaseFloat xvec_b_val = xvec_b(0,0), deriv_b; BaseFloat tot_weight, tot_objf; bool supply_deriv = config_.compute_deriv; + bool compute_accuracy = config_.compute_accuracy; ComputeXvectorObjfAndDeriv(xvector_pairs, xvec_s_sp, xvec_b_val, (supply_deriv ? &xvector_deriv : NULL), (supply_deriv ? &deriv_s : NULL), (supply_deriv ? &deriv_b : NULL), + (compute_accuracy ? &raw_scores : NULL), &tot_objf, &tot_weight); if (supply_deriv) { CuMatrix deriv_s_mat(1, s_dim), - deriv_b_mat(1,1); + deriv_b_mat(1,1); deriv_b_mat(0,0) = deriv_b; deriv_s_mat.CopyRowsFromVec(deriv_s); computer->AcceptOutputDeriv(xvector_name, &xvector_deriv); computer->AcceptOutputDeriv(s_name, &deriv_s_mat); computer->AcceptOutputDeriv(b_name, &deriv_b_mat); - } + SimpleObjectiveInfo &totals = objf_info_[xvector_name]; totals.tot_weight += tot_weight; totals.tot_objective += tot_objf; + + if (compute_accuracy) { + BaseFloat tot_acc, tot_weight_acc; + SimpleObjectiveInfo &acc_totals = acc_info_[xvector_name]; + ComputeAccuracy(raw_scores, &tot_weight_acc, &tot_acc); + acc_totals.tot_objective += tot_weight_acc * tot_acc; + acc_totals.tot_weight += tot_weight_acc; + } + num_minibatches_processed_++; } - num_minibatches_processed_++; } } } @@ -140,15 +164,70 @@ bool NnetXvectorComputeProb::PrintTotalStats() const { KALDI_LOG << "Overall " << (obj_type == kLinear ? "log-likelihood" : "objective") << " for '" << name << "' is " - << (info.tot_objective / info.tot_weight) << " per frame" - << ", over " << info.tot_weight << " frames."; + << (info.tot_objective / info.tot_weight) << " per chunk" + << ", over " << info.tot_weight << " chunks."; if (info.tot_weight > 0) ans = true; } } + if (config_.compute_accuracy) { // Now print the accuracy. + iter = acc_info_.begin(); + end = acc_info_.end(); + for (; iter != end; ++iter) { + const std::string &name = iter->first; + const SimpleObjectiveInfo &info = iter->second; + KALDI_LOG << "Overall accuracy for '" << name << "' is " + << (info.tot_objective / info.tot_weight) + << " per pair of chunks" + << ", over " << info.tot_weight << " pairs of chunks."; + } + } return ans; } +void NnetXvectorComputeProb::ComputeAccuracy( + const CuMatrixBase &raw_scores, + BaseFloat *tot_weight_out, + BaseFloat *tot_accuracy_out) { + int32 num_rows = raw_scores.NumCols(); + // The accuracy uses the EER threshold, which is calculated + // on the first minibatch. + if (need_eer_threshold_) { + std::vector target_scores; + std::vector nontarget_scores; + for (int32 i = 0; i < num_rows; i++) { + for (int32 j = 0; j < num_rows; j++) { + if (i + 1 == j && i % 2 == 0) { + target_scores.push_back(raw_scores(i, j)); + } else if (i < j) { + nontarget_scores.push_back(raw_scores(i, j)); + } + } + } + (*tot_accuracy_out) = 1.0 - ComputeEer(&target_scores, &nontarget_scores); + (*tot_weight_out) = target_scores.size() + nontarget_scores.size(); + need_eer_threshold_ = false; + } else { + int32 count = 0, + error = 0; + for (int32 i = 0; i < num_rows; i++) { + for (int32 j = 0; j < num_rows; j++) { + if (i + 1 == j && i % 2 == 0) { + if (raw_scores(i, j) < eer_threshold_) + error++; + count++; + } else if (i < j) { + if (raw_scores(i, j) >= eer_threshold_) + error++; + count++; + } + } + } + (*tot_accuracy_out) = 1.0 - static_cast(error) / count; + (*tot_weight_out) = count; + } +} + const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective( const std::string &output_name) const { unordered_map::const_iterator @@ -159,5 +238,28 @@ const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective( return NULL; } +BaseFloat NnetXvectorComputeProb::ComputeEer( + std::vector *target_scores, + std::vector *nontarget_scores) { + KALDI_ASSERT(!target_scores->empty() && !nontarget_scores->empty()); + std::sort(target_scores->begin(), target_scores->end()); + std::sort(nontarget_scores->begin(), nontarget_scores->end()); + int32 target_position = 0, + target_size = target_scores->size(); + for (; target_position + 1 < target_size; target_position++) { + int32 nontarget_size = nontarget_scores->size(), + nontarget_n = nontarget_size * target_position * 1.0 / target_size, + nontarget_position = nontarget_size - 1 - nontarget_n; + if (nontarget_position < 0) + nontarget_position = 0; + if ((*nontarget_scores)[nontarget_position] < + (*target_scores)[target_position]) + break; + } + eer_threshold_ = (*target_scores)[target_position]; + BaseFloat eer = target_position * 1.0 / target_size; + return eer; +} + } // namespace nnet3 } // namespace kaldi diff --git a/src/xvector/nnet-xvector-diagnostics.h b/src/xvector/nnet-xvector-diagnostics.h index 046088518b1..2b274efa784 100644 --- a/src/xvector/nnet-xvector-diagnostics.h +++ b/src/xvector/nnet-xvector-diagnostics.h @@ -71,6 +71,13 @@ class NnetXvectorComputeProb { ~NnetXvectorComputeProb(); private: void ProcessOutputs(NnetComputer *computer); + // Returns the Equal Error Rate (EER) and sets the threshold. + BaseFloat ComputeEer(std::vector *target_scores, + std::vector *nontarget_scores); + // Computes the accuracy for this minibatch. + void ComputeAccuracy(const CuMatrixBase &raw_scores, + BaseFloat *tot_weight_out, + BaseFloat *tot_accuracy_out); NnetComputeProbOptions config_; const Nnet &nnet_; @@ -80,12 +87,12 @@ class NnetXvectorComputeProb { // this is only for diagnostics. int32 num_minibatches_processed_; - + bool need_eer_threshold_; + BaseFloat eer_threshold_; unordered_map objf_info_; - + unordered_map acc_info_; }; - } // namespace nnet3 } // namespace kaldi diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc index 7327af90d45..f7fb430ad2c 100644 --- a/src/xvector/nnet-xvector-training.cc +++ b/src/xvector/nnet-xvector-training.cc @@ -30,13 +30,13 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config, nnet_(nnet), compiler_(*nnet, config_.optimize_config), num_minibatches_processed_(0) { - if (config.zero_component_stats) + if (config_.zero_component_stats) ZeroComponentStats(nnet); if (config.momentum == 0.0 && config.max_param_change == 0.0) { delta_nnet_= NULL; } else { - KALDI_ASSERT(config.momentum >= 0.0 && - config.max_param_change >= 0.0); + KALDI_ASSERT(config_.momentum >= 0.0 && + config_.max_param_change >= 0.0); delta_nnet_ = nnet_->Copy(); bool is_gradient = false; // setting this to true would disable the // natural-gradient updates. @@ -130,6 +130,7 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) { (supply_deriv ? &xvector_deriv : NULL), (supply_deriv ? &deriv_s : NULL), (supply_deriv ? &deriv_b : NULL), + NULL, &tot_objf, &tot_weight); @@ -246,7 +247,7 @@ void GetComputationRequestXvector(const Nnet &nnet, request->need_model_derivative = need_model_derivative; request->store_component_stats = store_component_stats; - // xvector-egs have multiple inputs(e.g. different inputs correspond + // xvector-egs has multiple inputs(e.g. different inputs correspond // to different chunks and no outputs. for (size_t i = 0; i < eg.io.size(); i++) { const NnetIo &io = eg.io[i]; @@ -263,28 +264,36 @@ void GetComputationRequestXvector(const Nnet &nnet, IoSpecification &io_spec = dest.back(); io_spec.name = name; io_spec.indexes = io.indexes; - io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative; + io_spec.has_deriv = false; } // We only need the output on frame t=0 for each n. int32 io_index_size = request->inputs[0].indexes.size(), - n_indx_size = 0; - std::vector output_indexes, + n_indx_size = 1e6, t_ind; + std::vector output_indexes, affine_output_indexes; affine_output_indexes.resize(1); affine_output_indexes[0].n = 0; affine_output_indexes[0].t = 0; - for (int32 indx = 0; indx < io_index_size; indx++) - if (request->inputs[0].indexes[indx].t == 0) - n_indx_size++; + std::map n_indx_sizes; + for (int32 indx = 0; indx < io_index_size; indx++) { + t_ind = request->inputs[0].indexes[indx].t; + if (n_indx_sizes.count(t_ind) != 0) + n_indx_sizes[t_ind] += 1; + else + n_indx_sizes.insert(std::make_pair(t_ind, 1)); + } + std::map::const_iterator iter; + for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++) + n_indx_size = std::min(n_indx_size, iter->second); output_indexes.resize(n_indx_size); for (int32 indx = 0; indx < n_indx_size; indx++) { output_indexes[indx].n = indx; output_indexes[indx].t = 0; } - + // In order to generate computation request for output nodes, // we should find output nodes and add io_spec for each one. int32 num_nodes = nnet.NumNodes(); @@ -294,8 +303,8 @@ void GetComputationRequestXvector(const Nnet &nnet, dest.resize(dest.size() + 1); IoSpecification &io_spec = dest.back(); io_spec.name = nnet.GetNodeName(node_index); - if (nnet.GetNodeName(node_index) == "s" || - nnet.GetNodeName(node_index) == "b") + if (nnet.GetNodeName(node_index) == "s" || + nnet.GetNodeName(node_index) == "b") io_spec.indexes = affine_output_indexes; else io_spec.indexes = output_indexes; diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc index 604d70e9c14..06e9055acf9 100644 --- a/src/xvector/xvector.cc +++ b/src/xvector/xvector.cc @@ -26,6 +26,7 @@ void ComputeXvectorObjfAndDeriv( const CuSpMatrix &S, BaseFloat b, CuMatrixBase *deriv_xvector, CuVector *deriv_S, BaseFloat *deriv_b, + CuMatrixBase *raw_scores, BaseFloat *tot_objf, BaseFloat *tot_weight) { @@ -40,6 +41,8 @@ void ComputeXvectorObjfAndDeriv( KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim); KALDI_ASSERT(deriv_xvector->NumRows() == N); KALDI_ASSERT(deriv_S->Dim() == S_dim); + deriv_xvector->SetZero(); + deriv_S->SetZero(); } CuMatrix S_tmp(S), @@ -61,6 +64,8 @@ void ComputeXvectorObjfAndDeriv( scores.AddMat(-1.0, R, kTrans); scores.AddMat(-1.0, R, kNoTrans); scores.Add(b); + if (raw_scores != NULL) + raw_scores->CopyFromMat(scores); cu::ComputeXvectorObjfFromScores(scores, &objf_terms, &objf_deriv_terms); CuVector objf_terms_vec(N); diff --git a/src/xvector/xvector.h b/src/xvector/xvector.h index 75083533acd..9ddc2d674fd 100644 --- a/src/xvector/xvector.h +++ b/src/xvector/xvector.h @@ -69,6 +69,7 @@ namespace kaldi { CuMatrixBase *deriv_xvector, CuVector *deriv_S, BaseFloat *deriv_b, + CuMatrixBase *raw_scores, BaseFloat *tot_objf, BaseFloat *tot_weight); } // namespace kaldi