diff --git a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
index 7ce4d553733..ca11fe2f283 100755
--- a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
+++ b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
@@ -10,6 +10,8 @@ stage=1
 train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
+mfcc_config=conf/mfcc_hires.conf
+mfccdir=mfcc
 
 . ./path.sh
 . ./utils/parse_options.sh
@@ -27,13 +29,12 @@ if [ $stage -le 1 ]; then
     if [ -f data/${datadir}_sp_hires/feats.scp ]; then
       echo "$0: directory data/${datadir}_sp_hires/feats.scp already exists, skipping creating it."
     else
-      mfccdir=mfcc
       utils/copy_data_dir.sh data/${datadir}_sp data/${datadir}_sp_hires
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config $mfcc_config \
         data/${datadir}_sp_hires exp/make_mfcc/${datadir}_sp_hires $mfccdir || exit 1;
       # we typically won't need the cmvn stats when using hires features-- it's
       # mostly for neural nets.
-      utils/fix_data_dir.sh data/${dataset}_sp_hires  # remove segments with problems
+      utils/fix_data_dir.sh data/${datadir}_sp_hires  # remove segments with problems
     fi
   done
 fi
@@ -50,7 +51,7 @@ if [ $stage -le 2 ]; then
       echo "$0: data/${dataset}_hires/feats.scp already exists, skipping mfcc generation"
     else
       utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config $mfcc_config \
         data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
       steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
       utils/fix_data_dir.sh data/${dataset}_hires  # remove segments with problems
diff --git a/egs/swbd/s5c/local/xvector/train.sh b/egs/swbd/s5c/local/xvector/train.sh
index f0499ee5741..6dca8b99458 100755
--- a/egs/swbd/s5c/local/xvector/train.sh
+++ b/egs/swbd/s5c/local/xvector/train.sh
@@ -7,10 +7,13 @@
 set -e
 
 stage=1
-train_stage=1
+train_stage=-10
 generate_alignments=true # false if doing ctc training
 speed_perturb=true
-
+init_lr=0.003
+final_lr=0.0003
+max_change=2.0
+use_gpu=true
 feat_dim=40 # this is the MFCC dim we use in the hires features.  you can't change it
             # unless you change local/xvector/prepare_perturbed_data.sh to use a different
             # MFCC config with a different dimension.
@@ -18,6 +21,7 @@ data=data/train_nodup_sp_hires  # you can't change this without changing
                                 # local/xvector/prepare_perturbed_data.sh
 xvector_dim=200 # dimension of the xVector.  configurable.
 xvector_dir=exp/xvector_a
+egs_dir=exp/xvector_a/egs
 
 
 . ./path.sh
@@ -40,18 +44,21 @@ if [ $stage -le 3 ]; then
       $xvector_dir/nnet.config
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then
   # dump egs.
   steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
-    "$data" $xvector_dir/egs
+    "$data" $egs_dir
 fi
 
 if [ $stage -le 5 ]; then
   # training for 4 epochs * 3 shifts means we see each eg 12
   # times (3 different frame-shifts of the same eg are counted as different).
   steps/nnet3/xvector/train.sh --cmd "$train_cmd" \
-      --num-epochs 4 --num-shifts 3 \
-      --num-jobs-initial 2 --num-jobs-final 8 \
+      --num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \
+      --initial-effective-lrate $init_lr --final-effective-lrate $final_lr \
+      --num-jobs-initial 1 --num-jobs-final 8 \
+      --max-param-change $max_change \
+      --egs-dir $egs_dir \
       $xvector_dir
 fi
 
diff --git a/egs/wsj/s5/steps/nnet3/xvector/train.sh b/egs/wsj/s5/steps/nnet3/xvector/train.sh
index e6ee22cfb16..bbdeefc6562 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/train.sh
+++ b/egs/wsj/s5/steps/nnet3/xvector/train.sh
@@ -8,7 +8,6 @@
 cmd=run.pl
 num_epochs=4      # Number of epochs of training;
                   # the number of iterations is worked out from this.
-diss_scale=1.0    # scale value used to scale the dissimalarity part in objective function.
 num_shifts=3
 initial_effective_lrate=0.003
 final_effective_lrate=0.0003
@@ -134,10 +133,10 @@ while [ $x -lt $num_iters ]; do
     # Set off jobs doing some diagnostics, in the background.
     # Use the egs dir from the previous iteration for the diagnostics
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \
-      nnet3-xvector-compute-prob $dir/$x.raw \
+      nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \
             "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/valid_diagnostic_egs.JOB.ark ark:- |" &
     $cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_train.$x.JOB.log \
-      nnet3-xvector-compute-prob $dir/$x.raw \
+      nnet3-xvector-compute-prob --compute-accuracy=true $dir/$x.raw \
            "ark:nnet3-merge-egs --measure-output-frames=false ark:$egs_dir/train_diagnostic_egs.JOB.ark ark:- |" &
 
     if [ $x -gt 0 ]; then
@@ -175,7 +174,7 @@ while [ $x -lt $num_iters ]; do
 
         $cmd $train_queue_opt $dir/log/train.$x.$n.log \
           nnet3-xvector-train $parallel_train_opts --print-interval=10 \
-          --max-param-change=$max_param_change --diss-scale=$diss_scale "$raw" \
+          --max-param-change=$max_param_change "$raw" \
           "ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \
           $dir/$[$x+1].$n.raw || touch $dir/.error &
       done
diff --git a/src/xvector/nnet-xvector-diagnostics.cc b/src/xvector/nnet-xvector-diagnostics.cc
index 74f2ce2e6aa..6648983b18e 100644
--- a/src/xvector/nnet-xvector-diagnostics.cc
+++ b/src/xvector/nnet-xvector-diagnostics.cc
@@ -36,6 +36,10 @@ NnetXvectorComputeProb::NnetXvectorComputeProb(const NnetComputeProbOptions &con
     bool is_gradient = true;  // force simple update
     SetZero(is_gradient, deriv_nnet_);
   }
+  if (config_.compute_accuracy)
+    need_eer_threshold_ = true;
+  else
+    need_eer_threshold_ = false;
 }
 
 const Nnet &NnetXvectorComputeProb::GetDeriv() const {
@@ -51,6 +55,7 @@ NnetXvectorComputeProb::~NnetXvectorComputeProb() {
 void NnetXvectorComputeProb::Reset() {
   num_minibatches_processed_ = 0;
   objf_info_.clear();
+  acc_info_.clear();
   if (deriv_nnet_) {
     bool is_gradient = true;
     SetZero(is_gradient, deriv_nnet_);
@@ -80,46 +85,65 @@ void NnetXvectorComputeProb::ProcessOutputs(NnetComputer *computer) {
     if (nnet_.IsOutputNode(node_index)) {
       std::string xvector_name = nnet_.GetNodeName(node_index),
         s_name = "s", b_name = "b";
-      if (nnet_.GetNodeIndex(s_name) == -1 || nnet_.GetNodeIndex(b_name) == -1)
-        KALDI_ERR << "The nnet expected to have two output nodes with name s and b.";
+      if (nnet_.GetNodeIndex(s_name) == -1
+          || nnet_.GetNodeIndex(b_name) == -1)
+        KALDI_ERR << "Expected the nnet to have two output nodes with name "
+                  << "s and b.";
 
       if (xvector_name != s_name && xvector_name != b_name) {
-        const CuMatrixBase<BaseFloat> &xvector_pairs = computer->GetOutput(xvector_name),
-          &xvec_s = computer->GetOutput(s_name),
-          &xvec_b = computer->GetOutput(b_name);
-        CuMatrix<BaseFloat> xvector_deriv(xvector_pairs.NumRows(), xvector_pairs.NumCols(),
-                                          kUndefined);
-        int32 s_dim = xvector_pairs.NumCols() * (xvector_pairs.NumCols() + 1) / 2;
+        const CuMatrixBase<BaseFloat> &xvector_pairs = computer->GetOutput(
+                                                       xvector_name),
+                                      &xvec_s = computer->GetOutput(
+                                                s_name),
+                                      &xvec_b = computer->GetOutput(
+                                                b_name);
+        int32 num_rows = xvector_pairs.NumRows(),
+              dim_xvector = xvector_pairs.NumCols();
+        int32 s_dim = dim_xvector * (dim_xvector + 1) / 2;
+
+        CuMatrix<BaseFloat> xvector_deriv(num_rows, dim_xvector,
+                                          kUndefined),
+                            raw_scores(num_rows, num_rows, kUndefined);
 
         // convert CuVector to CuSpMatrix
-        CuSpMatrix<BaseFloat> xvec_s_sp(xvector_pairs.NumCols());
+        CuSpMatrix<BaseFloat> xvec_s_sp(dim_xvector);
         xvec_s_sp.CopyFromVec(xvec_s.Row(0));
 
         CuVector<BaseFloat> deriv_s(s_dim);
         BaseFloat xvec_b_val = xvec_b(0,0), deriv_b;
         BaseFloat tot_weight, tot_objf;
         bool supply_deriv = config_.compute_deriv;
+        bool compute_accuracy = config_.compute_accuracy;
         ComputeXvectorObjfAndDeriv(xvector_pairs, xvec_s_sp, xvec_b_val,
                                    (supply_deriv ? &xvector_deriv : NULL),
                                    (supply_deriv ? &deriv_s : NULL),
                                    (supply_deriv ? &deriv_b : NULL),
+                                   (compute_accuracy ? &raw_scores : NULL),
                                    &tot_objf,
                                    &tot_weight);
         if (supply_deriv) {
           CuMatrix<BaseFloat> deriv_s_mat(1, s_dim),
-            deriv_b_mat(1,1);
+                              deriv_b_mat(1,1);
           deriv_b_mat(0,0) = deriv_b;
           deriv_s_mat.CopyRowsFromVec(deriv_s);
           computer->AcceptOutputDeriv(xvector_name, &xvector_deriv);
           computer->AcceptOutputDeriv(s_name, &deriv_s_mat);
           computer->AcceptOutputDeriv(b_name, &deriv_b_mat);
-
         }
+
         SimpleObjectiveInfo &totals = objf_info_[xvector_name];
         totals.tot_weight += tot_weight;
         totals.tot_objective += tot_objf;
+
+        if (compute_accuracy) {
+          BaseFloat tot_acc, tot_weight_acc;
+          SimpleObjectiveInfo &acc_totals = acc_info_[xvector_name];
+          ComputeAccuracy(raw_scores, &tot_weight_acc, &tot_acc);
+          acc_totals.tot_objective += tot_weight_acc * tot_acc;
+          acc_totals.tot_weight += tot_weight_acc;
+        }
+        num_minibatches_processed_++;
       }
-      num_minibatches_processed_++;
     }
   }
 }
@@ -140,15 +164,70 @@ bool NnetXvectorComputeProb::PrintTotalStats() const {
       KALDI_LOG << "Overall "
                 << (obj_type == kLinear ? "log-likelihood" : "objective")
                 << " for '" << name << "' is "
-                << (info.tot_objective / info.tot_weight) << " per frame"
-                << ", over " << info.tot_weight << " frames.";
+                << (info.tot_objective / info.tot_weight) << " per chunk"
+                << ", over " << info.tot_weight << " chunks.";
       if (info.tot_weight > 0)
         ans = true;
     }
   }
+  if (config_.compute_accuracy) {  // Now print the accuracy.
+    iter = acc_info_.begin();
+    end = acc_info_.end();
+    for (; iter != end; ++iter) {
+      const std::string &name = iter->first;
+      const SimpleObjectiveInfo &info = iter->second;
+      KALDI_LOG << "Overall accuracy for '" << name << "' is "
+                << (info.tot_objective / info.tot_weight)
+                << " per pair of chunks"
+                << ", over " << info.tot_weight << " pairs of chunks.";
+    }
+  }
   return ans;
 }
 
+void NnetXvectorComputeProb::ComputeAccuracy(
+    const CuMatrixBase<BaseFloat> &raw_scores,
+    BaseFloat *tot_weight_out,
+    BaseFloat *tot_accuracy_out) {
+  int32 num_rows = raw_scores.NumCols();
+  // The accuracy uses the EER threshold, which is calculated
+  // on the first minibatch.
+  if (need_eer_threshold_) {
+    std::vector<BaseFloat> target_scores;
+    std::vector<BaseFloat> nontarget_scores;
+    for (int32 i = 0; i < num_rows; i++) {
+      for (int32 j = 0; j < num_rows; j++) {
+        if (i + 1 == j && i % 2 == 0) {
+          target_scores.push_back(raw_scores(i, j));
+        } else if (i < j) {
+          nontarget_scores.push_back(raw_scores(i, j));
+        }
+      }
+    }
+    (*tot_accuracy_out) = 1.0 - ComputeEer(&target_scores, &nontarget_scores);
+    (*tot_weight_out) = target_scores.size() + nontarget_scores.size();
+    need_eer_threshold_ = false;
+  } else {
+    int32 count = 0,
+          error = 0;
+    for (int32 i = 0; i < num_rows; i++) {
+      for (int32 j = 0; j < num_rows; j++) {
+        if (i + 1 == j && i % 2 == 0) {
+          if (raw_scores(i, j) < eer_threshold_)
+            error++;
+          count++;
+        } else if (i < j) {
+          if (raw_scores(i, j) >= eer_threshold_)
+            error++;
+          count++;
+        }
+      }
+    }
+    (*tot_accuracy_out) = 1.0 - static_cast<BaseFloat>(error) / count;
+    (*tot_weight_out) = count;
+  }
+}
+
 const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective(
     const std::string &output_name) const {
   unordered_map<std::string, SimpleObjectiveInfo, StringHasher>::const_iterator
@@ -159,5 +238,28 @@ const SimpleObjectiveInfo* NnetXvectorComputeProb::GetObjective(
     return NULL;
 }
 
+BaseFloat NnetXvectorComputeProb::ComputeEer(
+    std::vector<BaseFloat> *target_scores,
+    std::vector<BaseFloat> *nontarget_scores) {
+  KALDI_ASSERT(!target_scores->empty() && !nontarget_scores->empty());
+  std::sort(target_scores->begin(), target_scores->end());
+  std::sort(nontarget_scores->begin(), nontarget_scores->end());
+  int32 target_position = 0,
+      target_size = target_scores->size();
+  for (; target_position + 1 < target_size; target_position++) {
+    int32 nontarget_size = nontarget_scores->size(),
+        nontarget_n = nontarget_size * target_position * 1.0 / target_size,
+        nontarget_position = nontarget_size - 1 - nontarget_n;
+    if (nontarget_position < 0)
+      nontarget_position = 0;
+    if ((*nontarget_scores)[nontarget_position] <
+        (*target_scores)[target_position])
+      break;
+  }
+  eer_threshold_ = (*target_scores)[target_position];
+  BaseFloat eer = target_position * 1.0 / target_size;
+  return eer;
+}
+
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/xvector/nnet-xvector-diagnostics.h b/src/xvector/nnet-xvector-diagnostics.h
index 046088518b1..2b274efa784 100644
--- a/src/xvector/nnet-xvector-diagnostics.h
+++ b/src/xvector/nnet-xvector-diagnostics.h
@@ -71,6 +71,13 @@ class NnetXvectorComputeProb {
   ~NnetXvectorComputeProb();
  private:
   void ProcessOutputs(NnetComputer *computer);
+  // Returns the Equal Error Rate (EER) and sets the threshold.
+  BaseFloat ComputeEer(std::vector<BaseFloat> *target_scores,
+                     std::vector<BaseFloat> *nontarget_scores);
+  // Computes the accuracy for this minibatch.
+  void ComputeAccuracy(const CuMatrixBase<BaseFloat> &raw_scores,
+                       BaseFloat *tot_weight_out,
+                       BaseFloat *tot_accuracy_out);
 
   NnetComputeProbOptions config_;
   const Nnet &nnet_;
@@ -80,12 +87,12 @@ class NnetXvectorComputeProb {
 
   // this is only for diagnostics.
   int32 num_minibatches_processed_;
-
+  bool need_eer_threshold_;
+  BaseFloat eer_threshold_;
   unordered_map<std::string, SimpleObjectiveInfo, StringHasher> objf_info_;
-
+  unordered_map<std::string, SimpleObjectiveInfo, StringHasher> acc_info_;
 };
 
-
 } // namespace nnet3
 } // namespace kaldi
 
diff --git a/src/xvector/nnet-xvector-training.cc b/src/xvector/nnet-xvector-training.cc
index 7327af90d45..f7fb430ad2c 100644
--- a/src/xvector/nnet-xvector-training.cc
+++ b/src/xvector/nnet-xvector-training.cc
@@ -30,13 +30,13 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config,
     nnet_(nnet),
     compiler_(*nnet, config_.optimize_config),
     num_minibatches_processed_(0) {
-  if (config.zero_component_stats)
+  if (config_.zero_component_stats)
     ZeroComponentStats(nnet);
   if (config.momentum == 0.0 && config.max_param_change == 0.0) {
     delta_nnet_= NULL;
   } else {
-    KALDI_ASSERT(config.momentum >= 0.0 &&
-                 config.max_param_change >= 0.0);
+    KALDI_ASSERT(config_.momentum >= 0.0 &&
+                 config_.max_param_change >= 0.0);
     delta_nnet_ = nnet_->Copy();
     bool is_gradient = false;  // setting this to true would disable the
                                // natural-gradient updates.
@@ -130,6 +130,7 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) {
                                    (supply_deriv ? &xvector_deriv : NULL),
                                    (supply_deriv ? &deriv_s : NULL),
                                    (supply_deriv ? &deriv_b : NULL),
+                                   NULL,
                                    &tot_objf,
                                    &tot_weight);
 
@@ -246,7 +247,7 @@ void GetComputationRequestXvector(const Nnet &nnet,
   request->need_model_derivative = need_model_derivative;
   request->store_component_stats = store_component_stats;
 
-  // xvector-egs have multiple inputs(e.g. different inputs correspond
+  // xvector-egs has multiple inputs(e.g. different inputs correspond
   // to different chunks and no outputs.
   for (size_t i = 0; i < eg.io.size(); i++) {
     const NnetIo &io = eg.io[i];
@@ -263,28 +264,36 @@ void GetComputationRequestXvector(const Nnet &nnet,
     IoSpecification &io_spec = dest.back();
     io_spec.name = name;
     io_spec.indexes = io.indexes;
-    io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative;
+    io_spec.has_deriv = false;
   }
 
   // We only need the output on frame t=0 for each n.
   int32 io_index_size = request->inputs[0].indexes.size(),
-         n_indx_size = 0;
-  std::vector<Index> output_indexes, 
+        n_indx_size = 1e6, t_ind;
+  std::vector<Index> output_indexes,
     affine_output_indexes;
   affine_output_indexes.resize(1);
   affine_output_indexes[0].n = 0;
   affine_output_indexes[0].t = 0;
 
-  for (int32 indx = 0; indx < io_index_size; indx++)
-    if (request->inputs[0].indexes[indx].t == 0)
-     n_indx_size++;
+  std::map<int32, int32> n_indx_sizes;
+  for (int32 indx = 0; indx < io_index_size; indx++) {
+    t_ind = request->inputs[0].indexes[indx].t;
+    if (n_indx_sizes.count(t_ind) != 0)
+      n_indx_sizes[t_ind] += 1;
+    else
+      n_indx_sizes.insert(std::make_pair(t_ind, 1));
+  }
+  std::map<int32, int32>::const_iterator iter;
+  for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++)
+    n_indx_size = std::min(n_indx_size, iter->second);
 
   output_indexes.resize(n_indx_size);
   for (int32 indx = 0; indx < n_indx_size; indx++) {
     output_indexes[indx].n = indx;
     output_indexes[indx].t = 0;
   }
-  
+
   // In order to generate computation request for output nodes,
   // we should find output nodes and add io_spec for each one.
   int32 num_nodes = nnet.NumNodes();
@@ -294,8 +303,8 @@ void GetComputationRequestXvector(const Nnet &nnet,
       dest.resize(dest.size() + 1);
       IoSpecification &io_spec = dest.back();
       io_spec.name = nnet.GetNodeName(node_index);
-      if (nnet.GetNodeName(node_index) == "s" || 
-          nnet.GetNodeName(node_index) == "b") 
+      if (nnet.GetNodeName(node_index) == "s" ||
+          nnet.GetNodeName(node_index) == "b")
         io_spec.indexes = affine_output_indexes;
       else
         io_spec.indexes = output_indexes;
diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc
index 604d70e9c14..06e9055acf9 100644
--- a/src/xvector/xvector.cc
+++ b/src/xvector/xvector.cc
@@ -26,6 +26,7 @@ void ComputeXvectorObjfAndDeriv(
     const CuSpMatrix<BaseFloat> &S,
     BaseFloat b, CuMatrixBase<BaseFloat> *deriv_xvector,
     CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b,
+    CuMatrixBase<BaseFloat> *raw_scores,
     BaseFloat *tot_objf,
     BaseFloat *tot_weight) {
 
@@ -40,6 +41,8 @@ void ComputeXvectorObjfAndDeriv(
     KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim);
     KALDI_ASSERT(deriv_xvector->NumRows() == N);
     KALDI_ASSERT(deriv_S->Dim() == S_dim);
+    deriv_xvector->SetZero();
+    deriv_S->SetZero();
   }
 
   CuMatrix<BaseFloat> S_tmp(S),
@@ -61,6 +64,8 @@ void ComputeXvectorObjfAndDeriv(
   scores.AddMat(-1.0, R, kTrans);
   scores.AddMat(-1.0, R, kNoTrans);
   scores.Add(b);
+  if (raw_scores != NULL)
+    raw_scores->CopyFromMat(scores);
 
   cu::ComputeXvectorObjfFromScores(scores, &objf_terms, &objf_deriv_terms);
   CuVector<BaseFloat> objf_terms_vec(N);
diff --git a/src/xvector/xvector.h b/src/xvector/xvector.h
index 75083533acd..9ddc2d674fd 100644
--- a/src/xvector/xvector.h
+++ b/src/xvector/xvector.h
@@ -69,6 +69,7 @@ namespace kaldi {
     CuMatrixBase<BaseFloat> *deriv_xvector,
     CuVector<BaseFloat> *deriv_S,
     BaseFloat *deriv_b,
+    CuMatrixBase<BaseFloat> *raw_scores,
     BaseFloat *tot_objf,
     BaseFloat *tot_weight);
 }  // namespace kaldi