pegahgh · david-ryan-snyder · Feb 24, 2016 · Feb 24, 2016 · Feb 24, 2016 · Feb 24, 2016
diff --git a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
@@ -10,6 +10,8 @@ stage=1
 train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
+mfcc_config=conf/mfcc_hires.conf
+mfccdir=mfcc
 
 . ./path.sh
 . ./utils/parse_options.sh
@@ -27,13 +29,12 @@ if [ $stage -le 1 ]; then
     if [ -f data/${datadir}_sp_hires/feats.scp ]; then
       echo "$0: directory data/${datadir}_sp_hires/feats.scp already exists, skipping creating it."
     else
-      mfccdir=mfcc
       utils/copy_data_dir.sh data/${datadir}_sp data/${datadir}_sp_hires
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config $mfcc_config \
         data/${datadir}_sp_hires exp/make_mfcc/${datadir}_sp_hires $mfccdir || exit 1;
       # we typically won't need the cmvn stats when using hires features-- it's
       # mostly for neural nets.
-      utils/fix_data_dir.sh data/${dataset}_sp_hires  # remove segments with problems
+      utils/fix_data_dir.sh data/${datadir}_sp_hires  # remove segments with problems
     fi
   done
 fi
@@ -50,7 +51,7 @@ if [ $stage -le 2 ]; then
       echo "$0: data/${dataset}_hires/feats.scp already exists, skipping mfcc generation"
     else
       utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config $mfcc_config \
         data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
       steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
       utils/fix_data_dir.sh data/${dataset}_hires  # remove segments with problems

diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh
@@ -7,17 +7,21 @@
 set -e
 
 stage=1
-train_stage=1
+train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
-
+init_lr=0.003
+final_lr=0.0003
+max_change=2.0
+use_gpu=true
 feat_dim=40 # this is the MFCC dim we use in the hires features.  you can't change it
             # unless you change local/xvector/prepare_perturbed_data.sh to use a different
             # MFCC config with a different dimension.
 data=data/train_nodup_sp_hires  # you can't change this without changing
                                 # local/xvector/prepare_perturbed_data.sh
 xvector_dim=200 # dimension of the xVector.  configurable.
 xvector_dir=exp/xvector_a
+egs_dir=exp/xvector_a/egs
 
 
 . ./path.sh
@@ -40,18 +44,21 @@ if [ $stage -le 3 ]; then
       $xvector_dir/nnet.config
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then
   # dump egs.
   steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
-    "$data" $xvector_dir/egs
+    "$data" $egs_dir
 fi
 
 if [ $stage -le 5 ]; then
   # training for 4 epochs * 3 shifts means we see each eg 12
   # times (3 different frame-shifts of the same eg are counted as different).
   steps/nnet3/xvector/train.sh --cmd "$train_cmd" \
-      --num-epochs 4 --num-shifts 3 \
-      --num-jobs-initial 2 --num-jobs-final 8 \
+      --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \
+      --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \
+      --num-jobs-initial 1 --num-jobs-final 8 \
+      --max-param-change $max_change \
+      --egs-dir $egs_dir \
       $xvector_dir
 fi
 

diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh
@@ -8,7 +8,6 @@
 cmd=run.pl
 num_epochs=4      # Number of epochs of training;
                   # the number of iterations is worked out from this.
-diss_scale=1.0    # scale value used to scale the dissimalarity part in objective function.
 num_shifts=3
 initial_effective_lrate=0.003
 final_effective_lrate=0.0003
@@ -134,10 +133,10 @@ while [ $x -lt $num_iters ]; do
     # Set off jobs doing some diagnostics, in the background.
     # Use the egs dir from the previous iteration for the diagnostics
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \
-      nnet3-xvector-compute-prob $dir/$x.raw \
+      nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \
             "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/valid_diagnostic_egs.JOB.ark ark:- |" &
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_train.$x.JOB.log \
-      nnet3-xvector-compute-prob $dir/$x.raw \
+      nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \
            "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/train_diagnostic_egs.JOB.ark ark:- |" &
 
     if [ $x -gt 0 ]; then
@@ -175,7 +174,7 @@ while [ $x -lt $num_iters ]; do
 
         $cmd $train_queue_opt $dir/log/train.$x.$n.log \
           nnet3-xvector-train $parallel_train_opts --print-interval=10 \
-          --max-param-change=$max_param_change --diss-scale=$diss_scale "$raw" \
+          --max-param-change=$max_param_change "$raw" \
           "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \
           $dir/$[$x+1].$n.raw || touch $dir/.error &
       done

diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc
@@ -36,6 +36,10 @@ NnetXvectorComputeProb::NnetXvectorComputeProb(const NnetComputeProbOptions &con
     bool is_gradient = true;  // force simple update
     SetZero(is_gradient, deriv_nnet_);
   }
+  if (config_.compute_accuracy)
+    need_eer_threshold_ = true;
+  else
+    need_eer_threshold_ = false;
 }
 
 const Nnet &NnetXvectorComputeProb::GetDeriv() const {
@@ -51,6 +55,7 @@ NnetXvectorComputeProb::~NnetXvectorComputeProb() {
 void NnetXvectorComputeProb::Reset() {
   num_minibatches_processed_ = 0;
   objf_info_.clear();
+  acc_info_.clear();
   if (deriv_nnet_) {
     bool is_gradient = true;
     SetZero(is_gradient, deriv_nnet_);
@@ -80,46 +85,65 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) {
     if (nnet_.IsOutputNode(node_index)) {
       std::string xvector_name = nnet_.GetNodeName(node_index),
         s_name = "s", b_name = "b";
-      if (nnet_.GetNodeIndex(s_name) == -1 || nnet_.GetNodeIndex(b_name) == -1)
-        KALDI_ERR << "The nnet expected to have two output nodes with name s and b.";
+      if (nnet_.GetNodeIndex(s_name) == -1
+          || nnet_.GetNodeIndex(b_name) == -1)
+        KALDI_ERR << "Expected the nnet to have two output nodes with name "
+                  << "s and b.";
 
       if (xvector_name != s_name && xvector_name != b_name) {
-        const CuMatrixBase<BaseFloat> &xvector_pairs = computer->GetOutput(xvector_name),
-          &xvec_s = computer->GetOutput(s_name),
-          &xvec_b = computer->GetOutput(b_name);
-        CuMatrix<BaseFloat> xvector_deriv(xvector_pairs.NumRows(), xvector_pairs.NumCols(),
-                                          kUndefined);
-        int32 s_dim = xvector_pairs.NumCols() * (xvector_pairs.NumCols() + 1) / 2;
+        const CuMatrixBase<BaseFloat> &xvector_pairs = computer->GetOutput(
+                                                       xvector_name),
+                                      &xvec_s = computer->GetOutput(
+                                                s_name),
+                                      &xvec_b = computer->GetOutput(
+                                                b_name);
+        int32 num_rows = xvector_pairs.NumRows(),
+              dim_xvector = xvector_pairs.NumCols();
+        int32 s_dim = dim_xvector * (dim_xvector + 1) / 2;
+
+        CuMatrix<BaseFloat> xvector_deriv(num_rows, dim_xvector,
+                                          kUndefined),
+                            raw_scores(num_rows, num_rows, kUndefined);
 
         // convert CuVector to CuSpMatrix
-        CuSpMatrix<BaseFloat> xvec_s_sp(xvector_pairs.NumCols());
+        CuSpMatrix<BaseFloat> xvec_s_sp(dim_xvector);
         xvec_s_sp.CopyFromVec(xvec_s.Row(0));
 
         CuVector<BaseFloat> deriv_s(s_dim);
         BaseFloat xvec_b_val = xvec_b(0,0), deriv_b;
         BaseFloat tot_weight, tot_objf;
         bool supply_deriv = config_.compute_deriv;
+        bool compute_accuracy = config_.compute_accuracy;
         ComputeXvectorObjfAndDeriv(xvector_pairs, xvec_s_sp, xvec_b_val,
                                    (supply_deriv ? &xvector_deriv : NULL),
                                    (supply_deriv ? &deriv_s : NULL),
                                    (supply_deriv ? &deriv_b : NULL),
+                                   (compute_accuracy ? &raw_scores : NULL),
                                    &tot_objf,
                                    &tot_weight);
         if (supply_deriv) {
           CuMatrix<BaseFloat> deriv_s_mat(1, s_dim),
-            deriv_b_mat(1,1);
+                              deriv_b_mat(1,1);
           deriv_b_mat(0,0) = deriv_b;
           deriv_s_mat.CopyRowsFromVec(deriv_s);
           computer->AcceptOutputDeriv(xvector_name, &xvector_deriv);
           computer->AcceptOutputDeriv(s_name, &deriv_s_mat);
           computer->AcceptOutputDeriv(b_name, &deriv_b_mat);
-
         }
+
         SimpleObjectiveInfo &totals = objf_info_[xvector_name];
         totals.tot_weight += tot_weight;
         totals.tot_objective += tot_objf;
+
+        if (compute_accuracy) {
+          BaseFloat tot_acc, tot_weight_acc;
+          SimpleObjectiveInfo &acc_totals = acc_info_[xvector_name];
+          ComputeAccuracy(raw_scores, &tot_weight_acc, &tot_acc);
+          acc_totals.tot_objective += tot_weight_acc * tot_acc;
+          acc_totals.tot_weight += tot_weight_acc;
+        }
+        num_minibatches_processed_++;
       }
-      num_minibatches_processed_++;
     }
   }
 }
@@ -140,15 +164,70 @@ bool NnetXvectorComputeProb::PrintTotalStats() const {
       KALDI_LOG << "Overall "
                 << (obj_type == kLinear ? "log-likelihood" : "objective")
                 << " for '" << name << "' is "
-                << (info.tot_objective / info.tot_weight) << " per frame"
-                << ", over " << info.tot_weight << " frames.";
+                << (info.tot_objective / info.tot_weight) << " per chunk"
+                << ", over " << info.tot_weight << " chunks.";
       if (info.tot_weight > 0)
         ans = true;
     }
   }
+  if (config_.compute_accuracy) {  // Now print the accuracy.
+    iter = acc_info_.begin();
+    end = acc_info_.end();
+    for (; iter != end; ++iter) {
+      const std::string &name = iter->first;
+      const SimpleObjectiveInfo &info = iter->second;
+      KALDI_LOG << "Overall accuracy for '" << name << "' is "
+                << (info.tot_objective / info.tot_weight)
+                << " per pair of chunks"
+                << ", over " << info.tot_weight << " pairs of chunks.";
+    }
+  }
   return ans;
 }
 
+void NnetXvectorComputeProb::ComputeAccuracy(
+    const CuMatrixBase<BaseFloat> &raw_scores,
+    BaseFloat *tot_weight_out,
+    BaseFloat *tot_accuracy_out) {
+  int32 num_rows = raw_scores.NumCols();
+  // The accuracy uses the EER threshold, which is calculated
+  // on the first minibatch.
+  if (need_eer_threshold_) {
+    std::vector<BaseFloat> target_scores;
+    std::vector<BaseFloat> nontarget_scores;
+    for (int32 i = 0; i < num_rows; i++) {
+      for (int32 j = 0; j < num_rows; j++) {
+        if (i + 1 == j && i % 2 == 0) {
+          target_scores.push_back(raw_scores(i, j));
+        } else if (i < j) {
+          nontarget_scores.push_back(raw_scores(i, j));
+        }
+      }
+    }
+    (*tot_accuracy_out) = 1.0 - ComputeEer(&target_scores, &nontarget_scores);
+    (*tot_weight_out) = target_scores.size() + nontarget_scores.size();
+    need_eer_threshold_ = false;
+  } else {
+    int32 count = 0,
+          error = 0;
+    for (int32 i = 0; i < num_rows; i++) {
+      for (int32 j = 0; j < num_rows; j++) {
+        if (i + 1 == j && i % 2 == 0) {
+          if (raw_scores(i, j) < eer_threshold_)
+            error++;
+          count++;
+        } else if (i < j) {
+          if (raw_scores(i, j) >= eer_threshold_)
+            error++;
+          count++;
+        }
+      }
+    }
+    (*tot_accuracy_out) = 1.0 - static_cast<BaseFloat>(error) / count;
+    (*tot_weight_out) = count;
+  }
+}
+
 const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective(
     const std::string &output_name) const {
   unordered_map<std::string, SimpleObjectiveInfo, StringHasher>::const_iterator
@@ -159,5 +238,28 @@ const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective(
     return NULL;
 }
 
+BaseFloat NnetXvectorComputeProb::ComputeEer(
+    std::vector<BaseFloat> *target_scores,
+    std::vector<BaseFloat> *nontarget_scores) {
+  KALDI_ASSERT(!target_scores->empty() && !nontarget_scores->empty());
+  std::sort(target_scores->begin(), target_scores->end());
+  std::sort(nontarget_scores->begin(), nontarget_scores->end());
+  int32 target_position = 0,
+      target_size = target_scores->size();
+  for (; target_position + 1 < target_size; target_position++) {
+    int32 nontarget_size = nontarget_scores->size(),
+        nontarget_n = nontarget_size * target_position * 1.0 / target_size,
+        nontarget_position = nontarget_size - 1 - nontarget_n;
+    if (nontarget_position < 0)
+      nontarget_position = 0;
+    if ((*nontarget_scores)[nontarget_position] <
+        (*target_scores)[target_position])
+      break;
+  }
+  eer_threshold_ = (*target_scores)[target_position];
+  BaseFloat eer = target_position * 1.0 / target_size;
+  return eer;
+}
+
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/xvector/nnet-xvector-diagnostics.h b/src/xvector/nnet-xvector-diagnostics.h
@@ -71,6 +71,13 @@ class NnetXvectorComputeProb {
   ~NnetXvectorComputeProb();
  private:
   void ProcessOutputs(NnetComputer *computer);
+  // Returns the Equal Error Rate (EER) and sets the threshold.
+  BaseFloat ComputeEer(std::vector<BaseFloat> *target_scores,
+                     std::vector<BaseFloat> *nontarget_scores);
+  // Computes the accuracy for this minibatch.
+  void ComputeAccuracy(const CuMatrixBase<BaseFloat> &raw_scores,
+                       BaseFloat *tot_weight_out,
+                       BaseFloat *tot_accuracy_out);
 
   NnetComputeProbOptions config_;
   const Nnet &nnet_;
@@ -80,12 +87,12 @@ class NnetXvectorComputeProb {
 
   // this is only for diagnostics.
   int32 num_minibatches_processed_;
-
+  bool need_eer_threshold_;
+  BaseFloat eer_threshold_;
   unordered_map<std::string, SimpleObjectiveInfo, StringHasher> objf_info_;
-
+  unordered_map<std::string, SimpleObjectiveInfo, StringHasher> acc_info_;
 };
 
-
 } // namespace nnet3
 } // namespace kaldi