From 413826dfa156adb4d3b61cbb1fe685cef1af2eaa Mon Sep 17 00:00:00 2001
From: David Snyder <dsnyder@a16.clsp.jhu.edu>
Date: Tue, 23 Feb 2016 22:14:05 -0500
Subject: [PATCH 1/5] xvector: adding EER-based accuracy calculation to xvector
 diagnostics.

---
 egs/wsj/s5/steps/nnet3/xvector/train.sh |   8 +-
 src/xvector/nnet-xvector-diagnostics.cc | 130 +++++++++++++++++++++---
 src/xvector/nnet-xvector-diagnostics.h  |  11 +-
 src/xvector/nnet-xvector-training.cc    |   9 +-
 src/xvector/xvector.cc                  |   3 +
 src/xvector/xvector.h                   |   1 +
 6 files changed, 137 insertions(+), 25 deletions(-)
diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh
index a05c62c5124..f3f218273c0 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/train.sh
+++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh
@@ -9,8 +9,8 @@ cmd=run.pl
 num_epochs=4      # Number of epochs of training;
                   # the number of iterations is worked out from this.
 num_shifts=3
-initial_effective_lrate=0.0003
-final_effective_lrate=0.00003
+initial_effective_lrate=0.003
+final_effective_lrate=0.0003
 num_jobs_initial=2 # Number of neural net jobs to run in parallel at the start of training
 num_jobs_final=8   # Number of neural net jobs to run in parallel at the end of training
 stage=-3
@@ -133,10 +133,10 @@ while [ $x -lt $num_iters ]; do
     # Set off jobs doing some diagnostics, in the background.
     # Use the egs dir from the previous iteration for the diagnostics
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \
-      nnet3-xvector-compute-prob $dir/$x.raw \
+      nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \
             "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/valid_diagnostic_egs.JOB.ark ark:- |" &
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_train.$x.JOB.log \
-      nnet3-xvector-compute-prob $dir/$x.raw \
+      nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \
            "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/train_diagnostic_egs.JOB.ark ark:- |" &
 
     if [ $x -gt 0 ]; then
diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc
index 74f2ce2e6aa..5f60816b22e 100644
--- a/src/xvector/nnet-xvector-diagnostics.cc
+++ b/src/xvector/nnet-xvector-diagnostics.cc
@@ -36,6 +36,10 @@ NnetXvectorComputeProb::NnetXvectorComputeProb(const NnetComputeProbOptions &con
     bool is_gradient = true;  // force simple update
     SetZero(is_gradient, deriv_nnet_);
   }
+  if (config_.compute_accuracy)
+    need_eer_threshold_ = true;
+  else
+    need_eer_threshold_ = false;
 }
 
 const Nnet &NnetXvectorComputeProb::GetDeriv() const {
@@ -51,6 +55,7 @@ NnetXvectorComputeProb::~NnetXvectorComputeProb() {
 void NnetXvectorComputeProb::Reset() {
   num_minibatches_processed_ = 0;
   objf_info_.clear();
+  acc_info_.clear();
   if (deriv_nnet_) {
     bool is_gradient = true;
     SetZero(is_gradient, deriv_nnet_);
@@ -80,46 +85,66 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) {
     if (nnet_.IsOutputNode(node_index)) {
       std::string xvector_name = nnet_.GetNodeName(node_index),
         s_name = "s", b_name = "b";
-      if (nnet_.GetNodeIndex(s_name) == -1 || nnet_.GetNodeIndex(b_name) == -1)
-        KALDI_ERR << "The nnet expected to have two output nodes with name s and b.";
+      if (nnet_.GetNodeIndex(s_name) == -1
+          || nnet_.GetNodeIndex(b_name) == -1)
+        KALDI_ERR << "Expected the nnet to have two output nodes with name "
+                  << "s and b.";
 
       if (xvector_name != s_name && xvector_name != b_name) {
-        const CuMatrixBase<BaseFloat> &xvector_pairs = computer->GetOutput(xvector_name),
-          &xvec_s = computer->GetOutput(s_name),
-          &xvec_b = computer->GetOutput(b_name);
-        CuMatrix<BaseFloat> xvector_deriv(xvector_pairs.NumRows(), xvector_pairs.NumCols(),
-                                          kUndefined);
-        int32 s_dim = xvector_pairs.NumCols() * (xvector_pairs.NumCols() + 1) / 2;
+        const CuMatrixBase<BaseFloat> &xvector_pairs = computer->GetOutput(
+                                                       xvector_name),
+                                      &xvec_s = computer->GetOutput(
+                                                s_name),
+                                      &xvec_b = computer->GetOutput(
+                                                b_name);
+        int32 num_rows = xvector_pairs.NumRows(),
+              dim_xvector = xvector_pairs.NumCols();
+        int32 s_dim = dim_xvector * (dim_xvector + 1) / 2;
+
+        CuMatrix<BaseFloat> xvector_deriv(num_rows,
+                                          dim_xvector,
+                                          kUndefined),
+                            raw_scores(num_rows, num_rows);
 
         // convert CuVector to CuSpMatrix
-        CuSpMatrix<BaseFloat> xvec_s_sp(xvector_pairs.NumCols());
+        CuSpMatrix<BaseFloat> xvec_s_sp(dim_xvector);
         xvec_s_sp.CopyFromVec(xvec_s.Row(0));
 
         CuVector<BaseFloat> deriv_s(s_dim);
         BaseFloat xvec_b_val = xvec_b(0,0), deriv_b;
         BaseFloat tot_weight, tot_objf;
         bool supply_deriv = config_.compute_deriv;
+        bool compute_accuracy = config_.compute_accuracy;
         ComputeXvectorObjfAndDeriv(xvector_pairs, xvec_s_sp, xvec_b_val,
                                    (supply_deriv ? &xvector_deriv : NULL),
                                    (supply_deriv ? &deriv_s : NULL),
                                    (supply_deriv ? &deriv_b : NULL),
+                                   (compute_accuracy ? &raw_scores : NULL),
                                    &tot_objf,
                                    &tot_weight);
         if (supply_deriv) {
           CuMatrix<BaseFloat> deriv_s_mat(1, s_dim),
-            deriv_b_mat(1,1);
+                              deriv_b_mat(1,1);
           deriv_b_mat(0,0) = deriv_b;
           deriv_s_mat.CopyRowsFromVec(deriv_s);
           computer->AcceptOutputDeriv(xvector_name, &xvector_deriv);
           computer->AcceptOutputDeriv(s_name, &deriv_s_mat);
           computer->AcceptOutputDeriv(b_name, &deriv_b_mat);
-
         }
+
         SimpleObjectiveInfo &totals = objf_info_[xvector_name];
         totals.tot_weight += tot_weight;
         totals.tot_objective += tot_objf;
+
+        if (compute_accuracy) {
+          BaseFloat tot_acc, tot_weight_acc;
+          SimpleObjectiveInfo &acc_totals = acc_info_[xvector_name];
+          ComputeAccuracy(raw_scores, &tot_weight_acc, &tot_acc);
+          acc_totals.tot_objective += tot_weight_acc * tot_acc;
+          acc_totals.tot_weight += tot_weight_acc;
+        }
+        num_minibatches_processed_++;
       }
-      num_minibatches_processed_++;
     }
   }
 }
@@ -140,15 +165,69 @@ bool NnetXvectorComputeProb::PrintTotalStats() const {
       KALDI_LOG << "Overall "
                 << (obj_type == kLinear ? "log-likelihood" : "objective")
                 << " for '" << name << "' is "
-                << (info.tot_objective / info.tot_weight) << " per frame"
-                << ", over " << info.tot_weight << " frames.";
+                << (info.tot_objective / info.tot_weight) << " per chunk"
+                << ", over " << info.tot_weight << " chunk.";
       if (info.tot_weight > 0)
         ans = true;
     }
   }
+  if (config_.compute_accuracy) {  // now print equal error-rates.
+    iter = acc_info_.begin();
+    end = acc_info_.end();
+    for (; iter != end; ++iter) {
+      const std::string &name = iter->first;
+      const SimpleObjectiveInfo &info = iter->second;
+      KALDI_LOG << "Overall accuracy for '" << name << "' is "
+                << (info.tot_objective / info.tot_weight) << " per pair of chunks"
+                << ", over " << info.tot_weight << " pairs of chunks.";
+      // don't bother changing ans; the loop over the regular objective should
+      // already have set it to true if we got any data.
+    }
+  }
   return ans;
 }
 
+void NnetXvectorComputeProb::ComputeAccuracy(
+    const CuMatrixBase<BaseFloat> &raw_scores,
+    BaseFloat *tot_weight_out,
+    BaseFloat *tot_accuracy_out) {
+  int32 num_rows = raw_scores.NumCols();
+  if (need_eer_threshold_) {
+    std::vector<BaseFloat> target_scores;
+    std::vector<BaseFloat> nontarget_scores;
+    for (int32 i = 0; i < num_rows; i++) {
+      for (int32 j = 0; j < num_rows; j++) {
+        if (i + 1 == j && i % 2 == 0) {
+          target_scores.push_back(raw_scores(i, j));
+        } else if (i < j) {
+          nontarget_scores.push_back(raw_scores(i, j));
+        }
+      }
+    }
+    (*tot_accuracy_out) = 1.0 - ComputeEer(&target_scores, &nontarget_scores);
+    (*tot_weight_out) = target_scores.size() + nontarget_scores.size();
+    need_eer_threshold_ = false;
+  } else {
+    int32 count = 0,
+          error = 0;
+    for (int32 i = 0; i < num_rows; i++) {
+      for (int32 j = 0; j < num_rows; j++) {
+        if (i + 1 == j && i % 2 == 0) {
+          if (raw_scores(i, j) < eer_threshold_)
+            error++;
+          count++;
+        } else if (i < j) {
+          if (raw_scores(i, j) >= eer_threshold_)
+            error++;
+          count++;
+        }
+      }
+    }
+    (*tot_accuracy_out) = 1.0 - static_cast<BaseFloat>(error) / count;
+    (*tot_weight_out) = count;
+  }
+}
+
 const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective(
     const std::string &output_name) const {
   unordered_map<std::string, SimpleObjectiveInfo, StringHasher>::const_iterator
@@ -159,5 +238,28 @@ const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective(
     return NULL;
 }
 
+BaseFloat NnetXvectorComputeProb::ComputeEer(std::vector<BaseFloat> *target_scores,
+                     std::vector<BaseFloat> *nontarget_scores) {
+  KALDI_ASSERT(!target_scores->empty() && !nontarget_scores->empty());
+  std::sort(target_scores->begin(), target_scores->end());
+  std::sort(nontarget_scores->begin(), nontarget_scores->end());
+
+  int32 target_position = 0,
+      target_size = target_scores->size();
+  for (; target_position + 1 < target_size; target_position++) {
+    int32 nontarget_size = nontarget_scores->size(),
+        nontarget_n = nontarget_size * target_position * 1.0 / target_size,
+        nontarget_position = nontarget_size - 1 - nontarget_n;
+    if (nontarget_position  < 0)
+      nontarget_position = 0;
+    if ((*nontarget_scores)[nontarget_position] <
+        (*target_scores)[target_position])
+      break;
+  }
+  eer_threshold_ = (*target_scores)[target_position];
+  BaseFloat eer = target_position * 1.0 / target_size;
+  return eer;
+}
+
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/xvector/nnet-xvector-diagnostics.h b/src/xvector/nnet-xvector-diagnostics.h
index 046088518b1..7dff86270f4 100644
--- a/src/xvector/nnet-xvector-diagnostics.h
+++ b/src/xvector/nnet-xvector-diagnostics.h
@@ -71,6 +71,11 @@ class NnetXvectorComputeProb {
   ~NnetXvectorComputeProb();
  private:
   void ProcessOutputs(NnetComputer *computer);
+  BaseFloat ComputeEer(std::vector<BaseFloat> *target_scores,
+                     std::vector<BaseFloat> *nontarget_scores);
+  void ComputeAccuracy(const CuMatrixBase<BaseFloat> &raw_scores,
+                       BaseFloat *tot_weight_out,
+                       BaseFloat *tot_accuracy_out);
 
   NnetComputeProbOptions config_;
   const Nnet &nnet_;
@@ -80,12 +85,12 @@ class NnetXvectorComputeProb {
 
   // this is only for diagnostics.
   int32 num_minibatches_processed_;
-
+  bool need_eer_threshold_;
+  BaseFloat eer_threshold_;
   unordered_map<std::string, SimpleObjectiveInfo, StringHasher> objf_info_;
-
+  unordered_map<std::string, SimpleObjectiveInfo, StringHasher> acc_info_;
 };
 
-
 } // namespace nnet3
 } // namespace kaldi
 
diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc
index 7327af90d45..dc512937882 100644
--- a/src/xvector/nnet-xvector-training.cc
+++ b/src/xvector/nnet-xvector-training.cc
@@ -130,6 +130,7 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) {
                                    (supply_deriv ? &xvector_deriv : NULL),
                                    (supply_deriv ? &deriv_s : NULL),
                                    (supply_deriv ? &deriv_b : NULL),
+                                   NULL,
                                    &tot_objf,
                                    &tot_weight);
 
@@ -269,7 +270,7 @@ void GetComputationRequestXvector(const Nnet &nnet,
   // We only need the output on frame t=0 for each n.
   int32 io_index_size = request->inputs[0].indexes.size(),
          n_indx_size = 0;
-  std::vector<Index> output_indexes, 
+  std::vector<Index> output_indexes,
     affine_output_indexes;
   affine_output_indexes.resize(1);
   affine_output_indexes[0].n = 0;
@@ -284,7 +285,7 @@ void GetComputationRequestXvector(const Nnet &nnet,
     output_indexes[indx].n = indx;
     output_indexes[indx].t = 0;
   }
-  
+
   // In order to generate computation request for output nodes,
   // we should find output nodes and add io_spec for each one.
   int32 num_nodes = nnet.NumNodes();
@@ -294,8 +295,8 @@ void GetComputationRequestXvector(const Nnet &nnet,
       dest.resize(dest.size() + 1);
       IoSpecification &io_spec = dest.back();
       io_spec.name = nnet.GetNodeName(node_index);
-      if (nnet.GetNodeName(node_index) == "s" || 
-          nnet.GetNodeName(node_index) == "b") 
+      if (nnet.GetNodeName(node_index) == "s" ||
+          nnet.GetNodeName(node_index) == "b")
         io_spec.indexes = affine_output_indexes;
       else
         io_spec.indexes = output_indexes;
diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc
index 604d70e9c14..f7a734e16b3 100644
--- a/src/xvector/xvector.cc
+++ b/src/xvector/xvector.cc
@@ -26,6 +26,7 @@ void ComputeXvectorObjfAndDeriv(
     const CuSpMatrix<BaseFloat> &S,
     BaseFloat b, CuMatrixBase<BaseFloat> *deriv_xvector,
     CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b,
+    CuMatrixBase<BaseFloat> *raw_scores,
     BaseFloat *tot_objf,
     BaseFloat *tot_weight) {
 
@@ -61,6 +62,8 @@ void ComputeXvectorObjfAndDeriv(
   scores.AddMat(-1.0, R, kTrans);
   scores.AddMat(-1.0, R, kNoTrans);
   scores.Add(b);
+  if (raw_scores != NULL)
+    raw_scores->CopyFromMat(scores);
 
   cu::ComputeXvectorObjfFromScores(scores, &objf_terms, &objf_deriv_terms);
   CuVector<BaseFloat> objf_terms_vec(N);
diff --git a/src/xvector/xvector.h b/src/xvector/xvector.h
index 75083533acd..9ddc2d674fd 100644
--- a/src/xvector/xvector.h
+++ b/src/xvector/xvector.h
@@ -69,6 +69,7 @@ namespace kaldi {
     CuMatrixBase<BaseFloat> *deriv_xvector,
     CuVector<BaseFloat> *deriv_S,
     BaseFloat *deriv_b,
+    CuMatrixBase<BaseFloat> *raw_scores,
     BaseFloat *tot_objf,
     BaseFloat *tot_weight);
 }  // namespace kaldi

From acc4eebcc57dde0e7afe2fcf6af18d8eaf426b6f Mon Sep 17 00:00:00 2001
From: Pegita <pegahgh@gmail.com>
Date: Thu, 25 Feb 2016 17:14:19 -0500
Subject: [PATCH 2/5] fixed bugs and issues with xvector setup.

---
 egs/swbd/s5c/local/xvector/train.sh           | 19 ++++++---
 .../steps/nnet3/xvector/make_jesus_configs.py |  2 +-
 egs/wsj/s5/steps/nnet3/xvector/train.sh       | 11 +++--
 src/xvector/nnet-xvector-training.cc          | 40 +++++++++++++------
 src/xvector/xvector.cc                        |  3 ++
 5 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh
index f0499ee5741..6dca8b99458 100755
--- a/egs/swbd/s5c/local/xvector/train.sh
+++ b/egs/swbd/s5c/local/xvector/train.sh
@@ -7,10 +7,13 @@
 set -e
 
 stage=1
-train_stage=1
+train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
-
+init_lr=0.003
+final_lr=0.0003
+max_change=2.0
+use_gpu=true
 feat_dim=40 # this is the MFCC dim we use in the hires features.  you can't change it
             # unless you change local/xvector/prepare_perturbed_data.sh to use a different
             # MFCC config with a different dimension.
@@ -18,6 +21,7 @@ data=data/train_nodup_sp_hires  # you can't change this without changing
                                 # local/xvector/prepare_perturbed_data.sh
 xvector_dim=200 # dimension of the xVector.  configurable.
 xvector_dir=exp/xvector_a
+egs_dir=exp/xvector_a/egs
 
 
 . ./path.sh
@@ -40,18 +44,21 @@ if [ $stage -le 3 ]; then
       $xvector_dir/nnet.config
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then
   # dump egs.
   steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
-    "$data" $xvector_dir/egs
+    "$data" $egs_dir
 fi
 
 if [ $stage -le 5 ]; then
   # training for 4 epochs * 3 shifts means we see each eg 12
   # times (3 different frame-shifts of the same eg are counted as different).
   steps/nnet3/xvector/train.sh --cmd "$train_cmd" \
-      --num-epochs 4 --num-shifts 3 \
-      --num-jobs-initial 2 --num-jobs-final 8 \
+      --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \
+      --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \
+      --num-jobs-initial 1 --num-jobs-final 8 \
+      --max-param-change $max_change \
+      --egs-dir $egs_dir \
       $xvector_dir
 fi
 
diff --git a/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py b/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py
index 51d58c5b89c..61eb2d41c24 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py
@@ -271,7 +271,7 @@ def WriteConfigs(self, f):
         # just have an affine component for the first hidden layer.
         # we don't need a nonlinearity as there is one at the input of
         # the jesus component.
-        print('component name=x-affine1 type=AffineComponent '
+        print('component name=x-affine1 type=NaturalGradientAffineComponent '
               'input-dim={0} output-dim={1} bias-stddev=0'.format(
                 cur_dim, args.jesus_input_dim), file=f)
         print('component-node name=x-affine1 component=x-affine1 input={0}'.format(
diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh
index a05c62c5124..f79c2680b1c 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/train.sh
+++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh
@@ -9,8 +9,8 @@ cmd=run.pl
 num_epochs=4      # Number of epochs of training;
                   # the number of iterations is worked out from this.
 num_shifts=3
-initial_effective_lrate=0.0003
-final_effective_lrate=0.00003
+initial_effective_lrate=0.003
+final_effective_lrate=0.0003
 num_jobs_initial=2 # Number of neural net jobs to run in parallel at the start of training
 num_jobs_final=8   # Number of neural net jobs to run in parallel at the end of training
 stage=-3
@@ -129,7 +129,7 @@ while [ $x -lt $num_iters ]; do
 
   if [ $stage -le $x ]; then
     echo "On iteration $x, learning rate is $this_learning_rate"
-
+    raw="nnet3-copy --learning-rate=$this_learning_rate $dir/$x.raw - |"
     # Set off jobs doing some diagnostics, in the background.
     # Use the egs dir from the previous iteration for the diagnostics
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \
@@ -142,7 +142,7 @@ while [ $x -lt $num_iters ]; do
     if [ $x -gt 0 ]; then
       $cmd $dir/log/progress.$x.log \
         nnet3-info $dir/$x.raw '&&' \
-        nnet3-show-progress --use-gpu=no $dir/$[$x-1].raw $dir/$x.raw  &
+        nnet3-show-progress --use-gpu=no $dir/$[$x-1].raw $dir/$x.raw &
     fi
 
     echo "Training neural net (pass $x)"
@@ -174,8 +174,7 @@ while [ $x -lt $num_iters ]; do
 
         $cmd $train_queue_opt $dir/log/train.$x.$n.log \
           nnet3-xvector-train $parallel_train_opts --print-interval=10 \
-          --max-param-change=$max_param_change \
-         $dir/$x.raw \
+          --max-param-change=$max_param_change "$raw" \
           "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \
           $dir/$[$x+1].$n.raw || touch $dir/.error &
       done
diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc
index 7327af90d45..5294879e69f 100644
--- a/src/xvector/nnet-xvector-training.cc
+++ b/src/xvector/nnet-xvector-training.cc
@@ -30,13 +30,14 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config,
     nnet_(nnet),
     compiler_(*nnet, config_.optimize_config),
     num_minibatches_processed_(0) {
-  if (config.zero_component_stats)
+  if (config_.zero_component_stats)
     ZeroComponentStats(nnet);
-  if (config.momentum == 0.0 && config.max_param_change == 0.0) {
+  if (config_.momentum == 0.0 && 
+      config_.max_param_change == 0.0) {
     delta_nnet_= NULL;
   } else {
-    KALDI_ASSERT(config.momentum >= 0.0 &&
-                 config.max_param_change >= 0.0);
+    KALDI_ASSERT(config_.momentum >= 0.0 &&
+                 config_.max_param_change >= 0.0);
     delta_nnet_ = nnet_->Copy();
     bool is_gradient = false;  // setting this to true would disable the
                                // natural-gradient updates.
@@ -94,7 +95,8 @@ void NnetXvectorTrainer::Train(const NnetExample &eg) {
     ScaleNnet(config_.momentum, delta_nnet_);
   }
   if (config_.write_cache != "") {
-    Output ko(config_.write_cache, config_.binary_write_cache);
+    Output ko(config_.write_cache, 
+      config_.binary_write_cache);
     compiler_.WriteCache(ko.Stream(), config_.binary_write_cache);
   }
 }
@@ -143,7 +145,8 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) {
           computer->AcceptOutputDeriv(b_name, &deriv_b_mat);
         }
 
-        objf_info_[xvector_name].UpdateStats(xvector_name, config_.print_interval,
+        objf_info_[xvector_name].UpdateStats(xvector_name, 
+                                             config_.print_interval,
                                              num_minibatches_processed_++,
                                              tot_weight, tot_objf);
       }
@@ -246,7 +249,7 @@ void GetComputationRequestXvector(const Nnet &nnet,
   request->need_model_derivative = need_model_derivative;
   request->store_component_stats = store_component_stats;
 
-  // xvector-egs have multiple inputs(e.g. different inputs correspond
+  // xvector-egs has multiple inputs(e.g. different inputs correspond
   // to different chunks and no outputs.
   for (size_t i = 0; i < eg.io.size(); i++) {
     const NnetIo &io = eg.io[i];
@@ -263,21 +266,34 @@ void GetComputationRequestXvector(const Nnet &nnet,
     IoSpecification &io_spec = dest.back();
     io_spec.name = name;
     io_spec.indexes = io.indexes;
-    io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative;
+    io_spec.has_deriv = false; 
   }
 
   // We only need the output on frame t=0 for each n.
+  // So the output index for output node is (n, 0, 0)
+  // for n = 0,.., min number of n-values for different t 
+  // in input indexes.
+  // indexes for "s" and "b" output nodes are equal to (0,0,0).
   int32 io_index_size = request->inputs[0].indexes.size(),
-         n_indx_size = 0;
+         n_indx_size = 1e6, t_ind;
   std::vector<Index> output_indexes, 
     affine_output_indexes;
   affine_output_indexes.resize(1);
   affine_output_indexes[0].n = 0;
   affine_output_indexes[0].t = 0;
+  
+  std::map<int32, int32> n_indx_sizes;
+  for (int32 indx = 0; indx < io_index_size; indx++) {
+    t_ind = request->inputs[0].indexes[indx].t;
+    if (n_indx_sizes.count(t_ind) != 0)
+      n_indx_sizes[t_ind] += 1;
+    else
+      n_indx_sizes.insert(std::make_pair(t_ind, 1));
+  }
+  std::map<int32, int32>::const_iterator iter;
+  for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++)
+    n_indx_size = std::min(n_indx_size, iter->second);
 
-  for (int32 indx = 0; indx < io_index_size; indx++)
-    if (request->inputs[0].indexes[indx].t == 0)
-     n_indx_size++;
 
   output_indexes.resize(n_indx_size);
   for (int32 indx = 0; indx < n_indx_size; indx++) {
diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc
index 604d70e9c14..aab825ba60b 100644
--- a/src/xvector/xvector.cc
+++ b/src/xvector/xvector.cc
@@ -40,6 +40,9 @@ void ComputeXvectorObjfAndDeriv(
     KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim);
     KALDI_ASSERT(deriv_xvector->NumRows() == N);
     KALDI_ASSERT(deriv_S->Dim() == S_dim);
+    deriv_xvector->Set(0.0);
+    deriv_S->Set(0.0);
+    (*deriv_b) = 0.0;
   }
 
   CuMatrix<BaseFloat> S_tmp(S),

From 862f964b91880cffbdcb5cb9710689e23a0ae8dd Mon Sep 17 00:00:00 2001
From: David Snyder <dsnyder@b01.clsp.jhu.edu>
Date: Thu, 25 Feb 2016 21:22:03 -0500
Subject: [PATCH 3/5] xvector: resolving merge issues

---
 .../local/xvector/prepare_perturbed_data.sh   |  9 ++++----
 egs/swbd/s5c/local/xvector/train.sh           | 21 +++++++++++++------
 egs/wsj/s5/steps/nnet3/xvector/train.sh       |  2 +-
 src/xvector/nnet-xvector-diagnostics.cc       |  2 +-
 4 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
index 7ce4d553733..ca11fe2f283 100755
--- a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
+++ b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
@@ -10,6 +10,8 @@ stage=1
 train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
+mfcc_config=conf/mfcc_hires.conf
+mfccdir=mfcc
 
 . ./path.sh
 . ./utils/parse_options.sh
@@ -27,13 +29,12 @@ if [ $stage -le 1 ]; then
     if [ -f data/${datadir}_sp_hires/feats.scp ]; then
       echo "$0: directory data/${datadir}_sp_hires/feats.scp already exists, skipping creating it."
     else
-      mfccdir=mfcc
       utils/copy_data_dir.sh data/${datadir}_sp data/${datadir}_sp_hires
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config $mfcc_config \
         data/${datadir}_sp_hires exp/make_mfcc/${datadir}_sp_hires $mfccdir || exit 1;
       # we typically won't need the cmvn stats when using hires features-- it's
       # mostly for neural nets.
-      utils/fix_data_dir.sh data/${dataset}_sp_hires  # remove segments with problems
+      utils/fix_data_dir.sh data/${datadir}_sp_hires  # remove segments with problems
     fi
   done
 fi
@@ -50,7 +51,7 @@ if [ $stage -le 2 ]; then
       echo "$0: data/${dataset}_hires/feats.scp already exists, skipping mfcc generation"
     else
       utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config $mfcc_config \
         data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
       steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
       utils/fix_data_dir.sh data/${dataset}_hires  # remove segments with problems
diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh
index f0499ee5741..64622f5dc2d 100755
--- a/egs/swbd/s5c/local/xvector/train.sh
+++ b/egs/swbd/s5c/local/xvector/train.sh
@@ -7,10 +7,13 @@
 set -e
 
 stage=1
-train_stage=1
+train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
-
+init_lr=0.003
+final_lr=0.0003
+max_change=2.0
+use_gpu=true
 feat_dim=40 # this is the MFCC dim we use in the hires features.  you can't change it
             # unless you change local/xvector/prepare_perturbed_data.sh to use a different
             # MFCC config with a different dimension.
@@ -18,6 +21,7 @@ data=data/train_nodup_sp_hires  # you can't change this without changing
                                 # local/xvector/prepare_perturbed_data.sh
 xvector_dim=200 # dimension of the xVector.  configurable.
 xvector_dir=exp/xvector_a
+egs_dir=exp/xvector_a/egs
 
 
 . ./path.sh
@@ -33,6 +37,8 @@ if [ $stage -le 3 ]; then
 
   $train_cmd $xvector_dir/log/make_configs.log \
     steps/nnet3/xvector/make_jesus_configs.py \
+      --jesus-stddev-scale=1.0 \
+      --s-scale=0.05 --b-scale=0.05 --output-scale=0.05 \
       --splice-indexes="-1,0,1 -2,-1,0,1 -3,0,3 mean+stddev+count(-99:3:9:0) 0" \
       --feat-dim $feat_dim --output-dim $xvector_dim \
       --num-jesus-blocks 100 \
@@ -40,18 +46,21 @@ if [ $stage -le 3 ]; then
       $xvector_dir/nnet.config
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then
   # dump egs.
   steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
-    "$data" $xvector_dir/egs
+    "$data" $egs_dir
 fi
 
 if [ $stage -le 5 ]; then
   # training for 4 epochs * 3 shifts means we see each eg 12
   # times (3 different frame-shifts of the same eg are counted as different).
   steps/nnet3/xvector/train.sh --cmd "$train_cmd" \
-      --num-epochs 4 --num-shifts 3 \
-      --num-jobs-initial 2 --num-jobs-final 8 \
+      --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \
+      --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \
+      --num-jobs-initial 1 --num-jobs-final 8 \
+      --max-param-change $max_change \
+      --egs-dir $egs_dir \
       $xvector_dir
 fi
 
diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh
index 44ca0e18015..1377728219c 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/train.sh
+++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh
@@ -175,7 +175,7 @@ while [ $x -lt $num_iters ]; do
 
         $cmd $train_queue_opt $dir/log/train.$x.$n.log \
           nnet3-xvector-train $parallel_train_opts --print-interval=10 \
-          --max-param-change=$max_param_change --diss-scale=$diss_scale "$raw" \
+          --max-param-change=$max_param_change "$raw" \
           "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \
           $dir/$[$x+1].$n.raw || touch $dir/.error &
       done
diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc
index 119c261b7d4..dce3072f85d 100644
--- a/src/xvector/nnet-xvector-diagnostics.cc
+++ b/src/xvector/nnet-xvector-diagnostics.cc
@@ -166,7 +166,7 @@ bool NnetXvectorComputeProb::PrintTotalStats() const {
                 << (obj_type == kLinear ? "log-likelihood" : "objective")
                 << " for '" << name << "' is "
                 << (info.tot_objective / info.tot_weight) << " per chunk"
-                << ", over " << info.tot_weight << " chunk.";
+                << ", over " << info.tot_weight << " chunks.";
       if (info.tot_weight > 0)
         ans = true;
     }

From 20bb518b3073edfef8f680a9ae63fc894891129b Mon Sep 17 00:00:00 2001
From: David Snyder <dsnyder@b01.clsp.jhu.edu>
Date: Thu, 25 Feb 2016 21:41:14 -0500
Subject: [PATCH 4/5] xvector: fixing merge problems

---
 egs/swbd/s5c/local/xvector/train.sh     |  2 --
 egs/wsj/s5/steps/nnet3/xvector/train.sh |  1 -
 src/xvector/nnet-xvector-training.cc    | 26 ++++++++++++++++---------
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh
index 64622f5dc2d..6dca8b99458 100755
--- a/egs/swbd/s5c/local/xvector/train.sh
+++ b/egs/swbd/s5c/local/xvector/train.sh
@@ -37,8 +37,6 @@ if [ $stage -le 3 ]; then
 
   $train_cmd $xvector_dir/log/make_configs.log \
     steps/nnet3/xvector/make_jesus_configs.py \
-      --jesus-stddev-scale=1.0 \
-      --s-scale=0.05 --b-scale=0.05 --output-scale=0.05 \
       --splice-indexes="-1,0,1 -2,-1,0,1 -3,0,3 mean+stddev+count(-99:3:9:0) 0" \
       --feat-dim $feat_dim --output-dim $xvector_dim \
       --num-jesus-blocks 100 \
diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh
index 1377728219c..bbdeefc6562 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/train.sh
+++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh
@@ -8,7 +8,6 @@
 cmd=run.pl
 num_epochs=4      # Number of epochs of training;
                   # the number of iterations is worked out from this.
-diss_scale=1.0    # scale value used to scale the dissimalarity part in objective function.
 num_shifts=3
 initial_effective_lrate=0.003
 final_effective_lrate=0.0003
diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc
index dc512937882..f7fb430ad2c 100644
--- a/src/xvector/nnet-xvector-training.cc
+++ b/src/xvector/nnet-xvector-training.cc
@@ -30,13 +30,13 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config,
     nnet_(nnet),
     compiler_(*nnet, config_.optimize_config),
     num_minibatches_processed_(0) {
-  if (config.zero_component_stats)
+  if (config_.zero_component_stats)
     ZeroComponentStats(nnet);
   if (config.momentum == 0.0 && config.max_param_change == 0.0) {
     delta_nnet_= NULL;
   } else {
-    KALDI_ASSERT(config.momentum >= 0.0 &&
-                 config.max_param_change >= 0.0);
+    KALDI_ASSERT(config_.momentum >= 0.0 &&
+                 config_.max_param_change >= 0.0);
     delta_nnet_ = nnet_->Copy();
     bool is_gradient = false;  // setting this to true would disable the
                                // natural-gradient updates.
@@ -247,7 +247,7 @@ void GetComputationRequestXvector(const Nnet &nnet,
   request->need_model_derivative = need_model_derivative;
   request->store_component_stats = store_component_stats;
 
-  // xvector-egs have multiple inputs(e.g. different inputs correspond
+  // xvector-egs has multiple inputs(e.g. different inputs correspond
   // to different chunks and no outputs.
   for (size_t i = 0; i < eg.io.size(); i++) {
     const NnetIo &io = eg.io[i];
@@ -264,21 +264,29 @@ void GetComputationRequestXvector(const Nnet &nnet,
     IoSpecification &io_spec = dest.back();
     io_spec.name = name;
     io_spec.indexes = io.indexes;
-    io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative;
+    io_spec.has_deriv = false;
   }
 
   // We only need the output on frame t=0 for each n.
   int32 io_index_size = request->inputs[0].indexes.size(),
-         n_indx_size = 0;
+        n_indx_size = 1e6, t_ind;
   std::vector<Index> output_indexes,
     affine_output_indexes;
   affine_output_indexes.resize(1);
   affine_output_indexes[0].n = 0;
   affine_output_indexes[0].t = 0;
 
-  for (int32 indx = 0; indx < io_index_size; indx++)
-    if (request->inputs[0].indexes[indx].t == 0)
-     n_indx_size++;
+  std::map<int32, int32> n_indx_sizes;
+  for (int32 indx = 0; indx < io_index_size; indx++) {
+    t_ind = request->inputs[0].indexes[indx].t;
+    if (n_indx_sizes.count(t_ind) != 0)
+      n_indx_sizes[t_ind] += 1;
+    else
+      n_indx_sizes.insert(std::make_pair(t_ind, 1));
+  }
+  std::map<int32, int32>::const_iterator iter;
+  for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++)
+    n_indx_size = std::min(n_indx_size, iter->second);
 
   output_indexes.resize(n_indx_size);
   for (int32 indx = 0; indx < n_indx_size; indx++) {

From cd58d06fb50ae5782256f1ea3c0f43525b0c44ab Mon Sep 17 00:00:00 2001
From: David Snyder <dsnyder@b01.clsp.jhu.edu>
Date: Thu, 25 Feb 2016 21:47:00 -0500
Subject: [PATCH 5/5] xvector: resolving merge issues

---
 src/xvector/nnet-xvector-diagnostics.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc
index dce3072f85d..6648983b18e 100644
--- a/src/xvector/nnet-xvector-diagnostics.cc
+++ b/src/xvector/nnet-xvector-diagnostics.cc
@@ -101,10 +101,9 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) {
               dim_xvector = xvector_pairs.NumCols();
         int32 s_dim = dim_xvector * (dim_xvector + 1) / 2;
 
-        CuMatrix<BaseFloat> xvector_deriv(num_rows,
-                                          dim_xvector,
+        CuMatrix<BaseFloat> xvector_deriv(num_rows, dim_xvector,
                                           kUndefined),
-                            raw_scores(num_rows, num_rows);
+                            raw_scores(num_rows, num_rows, kUndefined);
 
         // convert CuVector to CuSpMatrix
         CuSpMatrix<BaseFloat> xvec_s_sp(dim_xvector);