From 6bd441bee2a0c1e2f9ee7426db06c069b7531123 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Tue, 29 Nov 2016 02:23:14 -0500
Subject: [PATCH 01/23] add a function--ApplyAddAdditiveNoise

---
 .../nnet3-xvector-signal-perturb-egs.cc       | 108 ++++++++++++++++--
 1 file changed, 101 insertions(+), 7 deletions(-)
diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index f78c9c6a03a..9b154490d8f 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -51,8 +51,8 @@ void ApplyPerturbation(XvectorPerturbOptions opts,
     // randomly generate an zero-phase FIR filter with no zeros.
     // In future, we can select trucated part of room impluse response
     // and convolve it with input_egs.
-    perturb_xvector.ComputeAndApplyRandDistortion(shifted_egs,
-                                  &rand_distort_shifted_egs);
+    ////perturb_xvector.ComputeAndApplyRandDistortion(shifted_egs,
+    ////                              &rand_distort_shifted_egs);
   }
 
   if (noise_egs) { 
@@ -73,16 +73,71 @@ void ApplyPerturbation(XvectorPerturbOptions opts,
   }
   // Perturb speed of signal egs
   Matrix<BaseFloat> warped_distorted_shifted_egs(rand_distort_shifted_egs);
-  if (opts.max_time_stretch != 0.0) 
-    perturb_xvector.TimeStretch(rand_distort_shifted_egs, 
-                                &warped_distorted_shifted_egs);
+  ////if (opts.max_time_stretch != 0.0) 
+  ////  perturb_xvector.TimeStretch(rand_distort_shifted_egs, 
+  ////                              &warped_distorted_shifted_egs);
    
   // If nagation is true, the sample values are randomly negated
   // with some probability.
-  if (opts.negation) {
+  ////if (opts.negation) {
    
+  ////}
+}
+
+// add
+// This function add the noise to the orginial signal. We should not normalize 
+// the signal level of the orginial signal. According to SNR, we rescale the noise
+// and add it. So that the perturbed signal is created. 
+void ApplyAddAdditiveNoise(const int32 &SNR,
+                           const Matrix<BaseFloat> &input_eg,
+                           const Matrix<BaseFloat> &noise_eg,
+                           Matrix<BaseFloat> *perturb_eg) {
+  // In the version, we ask the noise_cols >= input_cols. If mfcc, the cols are equal.
+  // If raw data, we ask the noise_cols > input_cols.
+  int32 input_rows = input_eg.NumRows(), input_cols = input_eg.NumCols();  
+  KALDI_ASSERT(noise_eg.NumCols() >= input_cols);
+
+  // According to the rows of noise_eg, form the noise_mat
+  // repeat the noise_eg blocks to have a new block which is longer than input_eg
+  Matrix<BaseFloat> noise_mat;
+  if (noise_eg.NumRows() < input_rows) {
+    int32 repeat_times = (input_rows / noise_eg.NumRows()) + 1;
+    noise_mat.Resize(noise_eg.NumRows() * repeat_times, noise_eg.NumCols());
+    for (int32 i = 0; i < repeat_times; ++i) {
+      noise_mat.Range(i*noise_eg.NumRows(), noise_eg.NumRows(), 
+                      0, noise_eg.NumCols()).CopyFromMat(noise_eg);
+    }
+  } else {
+    noise_mat.Resize(noise_eg.NumRows(), noise_eg.NumCols());
+    noise_mat.CopyFromMat(noise_eg);
   }
+
+  // select the noise range
+  int32 noise_rows = noise_mat.NumRows(), noise_cols = noise_mat.NumCols();
+  int32 start_row_ind = RandInt(0, noise_rows - input_rows),
+        start_col_ind = RandInt(0, noise_cols - input_cols); 
+  Matrix<BaseFloat> selected_noise_mat(input_rows, input_cols);
+  selected_noise_mat.AddMat(1.0, noise_mat.Range(start_row_ind, input_rows,
+                                                  start_col_ind, input_cols));
+  // compute the energy of noise and input
+  Matrix<BaseFloat> input_energy_mat(input_rows, input_cols);
+  input_energy_mat.AddMatMatElements(1.0, input_eg, input_eg, 1.0);
+  double input_energy = input_energy_mat.Sum();
+  Matrix<BaseFloat> noise_energy_mat(input_rows, input_cols);
+  noise_energy_mat.AddMatMatElements(1.0, selected_noise_mat, selected_noise_mat, 1.0);
+  double noise_energy = noise_energy_mat.Sum();
+
+  // In Energy domain, SNR=20log10(S/N). 
+  // 10^(SNR/20) = input_energy / (scale^2 * noise_energy)
+  double scale = input_energy / noise_energy / (pow(10,SNR/20));
+  scale = sqrt(scale);
+  
+  // Add noise mat to input_eg mat
+  perturb_eg->Resize(input_rows, input_cols);
+  perturb_eg->CopyFromMat(input_eg);
+  perturb_eg->AddMat(scale, selected_noise_mat);
 }
+// add-end
 
 } // end of namespace nnet3
 } // end of namespace kaldi
@@ -108,6 +163,14 @@ int main(int argc, char *argv[]) {
     XvectorPerturbOptions perturb_opts;
     perturb_opts.Register(&po);
 
+    // add
+    std::string add_noise_rspecifier;
+    po.Register("add-noise", &add_noise_rspecifier, "specify a file contains some noise egs");
+    int32 snr;
+    po.Register("SNR",&snr,"specify a Signal to Noise Ration.We will scale the noise according \
+                to the original signal and SNR. Normally, it's a non-zero number between -30 and 30");
+    // add-end
+
     po.Read(argc, argv);
     if (po.NumArgs() != 2) {
       po.PrintUsage();
@@ -121,6 +184,20 @@ int main(int argc, char *argv[]) {
      
     NnetExampleWriter example_writer(examples_wspecifier);
 
+    // add
+    // count the number of noise examples and record the key
+    std::vector<std::string> list_noise_egs;
+    SequentialNnetExampleReader noise_seq_reader(add_noise_rspecifier);
+    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
+      std::string key = noise_seq_reader.Key();
+      list_noise_egs.push_back(key);
+    }
+    noise_seq_reader.Close();
+    int32 num_noise_egs = list_noise_egs.size();
+    // initial a RandomAccessTableReader for noise egs
+    RandomAccessNnetExampleReader noise_random_reader(add_noise_rspecifier);
+    // add-end
+
     int64 num_read = 0, num_written = 0;
 
     Matrix<BaseFloat> *noise_mat = NULL;
@@ -141,7 +218,24 @@ int main(int argc, char *argv[]) {
       Matrix<BaseFloat> perturb_eg_mat, 
         input_eg_mat;
       input_eg_io.features.CopyToMat(&input_eg_mat);
-      ApplyPerturbation(perturb_opts, input_eg_mat, noise_mat, &perturb_eg_mat);
+      
+      // add
+      if (!add_noise_rspecifier.empty()) {
+        // random choose a noise example
+        int32 index_noise_egs = RandInt(0, num_noise_egs - 1);
+        std::string key_noise_egs = list_noise_egs[index_noise_egs];
+        const NnetExample &noise_eg = noise_random_reader.Value(key_noise_egs);
+        const NnetIo &noise_eg_io = noise_eg.io[0];
+        
+        Matrix<BaseFloat> noise_eg_mat;
+        noise_eg_io.features.CopyToMat(&noise_eg_mat);
+
+        // deal with add noise
+        ApplyAddAdditiveNoise(snr, input_eg_mat, noise_eg_mat, &perturb_eg_mat);
+      } else {
+        ApplyPerturbation(perturb_opts, input_eg_mat, noise_mat, &perturb_eg_mat);
+      }
+      // add-end
       perturb_eg->io.resize(1.0);
       perturb_eg->io[0].features.SwapFullMatrix(&perturb_eg_mat);
       example_writer.Write(key, *perturb_eg);

From 0e73c48793276ab25ed80317541ffb48b57ada08 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Tue, 29 Nov 2016 17:10:04 -0500
Subject: [PATCH 02/23] fix the noise_eg.NumCols() == input_eg.NumCols()

---
 src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index 9b154490d8f..875051a1cc8 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -92,10 +92,9 @@ void ApplyAddAdditiveNoise(const int32 &SNR,
                            const Matrix<BaseFloat> &input_eg,
                            const Matrix<BaseFloat> &noise_eg,
                            Matrix<BaseFloat> *perturb_eg) {
-  // In the version, we ask the noise_cols >= input_cols. If mfcc, the cols are equal.
-  // If raw data, we ask the noise_cols > input_cols.
+  // In the version, we ask the noise_cols == input_cols.
   int32 input_rows = input_eg.NumRows(), input_cols = input_eg.NumCols();  
-  KALDI_ASSERT(noise_eg.NumCols() >= input_cols);
+  KALDI_ASSERT(noise_eg.NumCols() == input_cols);
 
   // According to the rows of noise_eg, form the noise_mat
   // repeat the noise_eg blocks to have a new block which is longer than input_eg
@@ -113,12 +112,11 @@ void ApplyAddAdditiveNoise(const int32 &SNR,
   }
 
   // select the noise range
-  int32 noise_rows = noise_mat.NumRows(), noise_cols = noise_mat.NumCols();
-  int32 start_row_ind = RandInt(0, noise_rows - input_rows),
-        start_col_ind = RandInt(0, noise_cols - input_cols); 
+  int32 noise_rows = noise_mat.NumRows();
+  int32 start_row_ind = RandInt(0, noise_rows - input_rows);
   Matrix<BaseFloat> selected_noise_mat(input_rows, input_cols);
   selected_noise_mat.AddMat(1.0, noise_mat.Range(start_row_ind, input_rows,
-                                                  start_col_ind, input_cols));
+                                                 0, input_cols));
   // compute the energy of noise and input
   Matrix<BaseFloat> input_energy_mat(input_rows, input_cols);
   input_energy_mat.AddMatMatElements(1.0, input_eg, input_eg, 1.0);

From f2e3119f4fee9c7f56ec6e57e05182599f0c02ab Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Sat, 3 Dec 2016 23:49:22 -0500
Subject: [PATCH 03/23] modify the design style

---
 src/feat/signal-distort.cc                    | 88 +++++++++++++++++
 src/feat/signal-distort.h                     | 17 +++-
 .../nnet3-xvector-signal-perturb-egs.cc       | 96 +++----------------
 3 files changed, 115 insertions(+), 86 deletions(-)

diff --git a/src/feat/signal-distort.cc b/src/feat/signal-distort.cc
index 788860ba769..25744f788a6 100644
--- a/src/feat/signal-distort.cc
+++ b/src/feat/signal-distort.cc
@@ -72,5 +72,93 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
   perturb_egs->CopyFromMat(out_mat);
 }
 
+// This function add the noise to the orginial signal. We should not normalize 
+// the signal level of the orginial signal. According to SNR, we rescale the noise
+// and add it. So that the perturbed signal is created. 
+void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
+                                              const Matrix<BaseFloat> &noise_eg,
+                                              const int32 &SNR,
+                                              Matrix<BaseFloat> *perturb_eg) {
+  // In the version, we ask the noise_cols == input_cols.
+  int32 input_rows = input_eg.NumRows(), input_cols = input_eg.NumCols();  
+  KALDI_ASSERT(noise_eg.NumCols() == input_cols);
 
+  // According to the rows of noise_eg, form the noise_mat
+  // repeat the noise_eg blocks to have a new block which is longer than input_eg
+ 
+  // As the noise_eg is very huge and the input_eg is small normally,
+  // so we'd better not reload the "noise_eg" matrix
+  // select the noise range
+
+  Matrix<BaseFloat> selected_noise_mat;
+  selected_noise_mat.Resize(input_rows, input_cols);
+  
+  int32 noise_rows = noise_eg.NumRows();
+  int32 start_row_ind = RandInt(0, noise_rows - input_rows);
+  
+  if (noise_eg.NumRows() < input_rows) {
+    int32 indices[input_rows];
+    for (int32 i=0; i < input_rows; ++i) {
+      indices[i] = (start_row_ind + i) % noise_eg.NumRows();
+    }
+    selected_noise_mat.CopyRows(noise_eg, indices);
+  } else {
+    selected_noise_mat.AddMat(1.0, noise_eg.Range(start_row_ind, input_rows,
+                                                  0, input_cols));
+  }
+
+  // compute the energy of noise and input
+  Matrix<BaseFloat> input_energy_mat(input_rows, input_cols);
+  input_energy_mat.AddMatMatElements(1.0, input_eg, input_eg, 0.0);
+  double input_energy = input_energy_mat.Sum();
+  Matrix<BaseFloat> noise_energy_mat(input_rows, input_cols);
+  noise_energy_mat.AddMatMatElements(1.0, selected_noise_mat, selected_noise_mat, 0.0);
+  double noise_energy = noise_energy_mat.Sum();
+
+  // In Energy domain, SNR=20log10(S/N). 
+  // 10^(SNR/20) = input_energy / (scale^2 * noise_energy)
+  double scale = input_energy / noise_energy / (pow(10,SNR/20));
+  scale = sqrt(scale);
+  
+  // Add noise mat to input_eg mat
+  perturb_eg->Resize(input_rows, input_cols);
+  perturb_eg->CopyFromMat(input_eg);
+  perturb_eg->AddMat(scale, selected_noise_mat);
+}
+
+void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
+                                           Matrix<BaseFloat> *perturb_egs) {
+  if (!opts_.add_noise_rspecifier.empty()) { // deal with the add_noise ark situdation
+    // count the number of noise examples and record the key
+    std::vector<std::string> list_noise_egs;
+    list_noise_egs.clear();
+    kaldi::nnet3::SequentialNnetExampleReader noise_seq_reader(opts_.add_noise_rspecifier);
+    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
+      std::string key = noise_seq_reader.Key();
+      list_noise_egs.push_back(key);
+    }
+    noise_seq_reader.Close();
+    
+    // random choose a noise_eg and use it.
+    int32 num_noise_egs = list_noise_egs.size();
+    int32 index_noise_egs = RandInt(0, num_noise_egs - 1);
+    std::string key_noise_egs = list_noise_egs[index_noise_egs];
+
+    kaldi::nnet3::RandomAccessNnetExampleReader noise_random_reader(opts_.add_noise_rspecifier);
+    const kaldi::nnet3::NnetExample &noise_eg = noise_random_reader.Value(key_noise_egs);
+    const kaldi::nnet3::NnetIo &noise_eg_io = noise_eg.io[0];
+    Matrix<BaseFloat> noise_eg_mat;
+    noise_eg_io.features.CopyToMat(&noise_eg_mat);
+    int32 SNR = opts_.snr;
+
+    // conduct ApplyAdditiveNoise
+    ApplyAdditiveNoise(input_egs, noise_eg_mat, SNR, perturb_egs);
+
+    // conduct others
+    // TODO
+  } else { // deal with the opts_.noise_egs situation
+    // TODO
+  }
+}
+// add-end
 } // end of namespace kaldi
diff --git a/src/feat/signal-distort.h b/src/feat/signal-distort.h
index b3faad96554..d6c0e3e84ff 100644
--- a/src/feat/signal-distort.h
+++ b/src/feat/signal-distort.h
@@ -32,6 +32,7 @@
 #include "feat/resample.h"
 #include "matrix/matrix-functions.h"
 #include "cudamatrix/cu-matrix.h"
+#include "nnet3/nnet-example.h"
 
 namespace kaldi {
 
@@ -43,11 +44,15 @@ struct XvectorPerturbOptions {
   int32 negation_prop; 
   bool rand_distort;
   std::string noise_egs;
+  std::string add_noise_rspecifier;
+  int32 snr;
+
   XvectorPerturbOptions(): max_shift(0.2),
                            max_time_stretch(0.2),
                            frame_dim(80),
                            negation_prop(0.0),
-                           rand_distort(false) { }
+                           rand_distort(false),
+                           snr(10) { }
   void Register(OptionsItf *opts) { 
     opts->Register("max-shift", &max_shift, "Maximum random shift relative"
                 "to frame length applied to egs.");
@@ -59,6 +64,10 @@ struct XvectorPerturbOptions {
     opts->Register("noise-egs", &noise_egs, "If supplied, the additive noise is added to input signal.");
     opts->Register("rand_distort", &rand_distort, "If true, the signal is slightly changes"
                    "using some designed FIR filter with no zeros.");
+    opts->Register("add-noise", &add_noise_rspecifier, "specify a file contains some noise egs");
+    opts->Register("SNR",&snr,"specify a Signal to Noise Ration. We will scale the noise according"
+                "to the original signal and SNR. Normally, it's a non-zero number between -30 and 30"
+                "default=10");
   }
 };
 
@@ -70,8 +79,14 @@ class PerturbXvectorSignal {
                        Matrix<BaseFloat> *perturb_egs);
  private:
   XvectorPerturbOptions opts_;
+
+  void ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
+                          const Matrix<BaseFloat> &noise_eg,
+                          const int32 &SNR,
+                          Matrix<BaseFloat> *perturb_eg);
 };
 
+
 // randomly disturb the input signal using a band-pass filter with no zeros.
 void ComputeAndApplyRandDistortion(const MatrixBase<BaseFloat> &input_egs,
                                    Matrix<BaseFloat> *perturb_egs);
diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index 875051a1cc8..71c297ea18a 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -85,55 +85,14 @@ void ApplyPerturbation(XvectorPerturbOptions opts,
 }
 
 // add
-// This function add the noise to the orginial signal. We should not normalize 
-// the signal level of the orginial signal. According to SNR, we rescale the noise
-// and add it. So that the perturbed signal is created. 
-void ApplyAddAdditiveNoise(const int32 &SNR,
-                           const Matrix<BaseFloat> &input_eg,
-                           const Matrix<BaseFloat> &noise_eg,
-                           Matrix<BaseFloat> *perturb_eg) {
-  // In the version, we ask the noise_cols == input_cols.
-  int32 input_rows = input_eg.NumRows(), input_cols = input_eg.NumCols();  
-  KALDI_ASSERT(noise_eg.NumCols() == input_cols);
-
-  // According to the rows of noise_eg, form the noise_mat
-  // repeat the noise_eg blocks to have a new block which is longer than input_eg
-  Matrix<BaseFloat> noise_mat;
-  if (noise_eg.NumRows() < input_rows) {
-    int32 repeat_times = (input_rows / noise_eg.NumRows()) + 1;
-    noise_mat.Resize(noise_eg.NumRows() * repeat_times, noise_eg.NumCols());
-    for (int32 i = 0; i < repeat_times; ++i) {
-      noise_mat.Range(i*noise_eg.NumRows(), noise_eg.NumRows(), 
-                      0, noise_eg.NumCols()).CopyFromMat(noise_eg);
-    }
-  } else {
-    noise_mat.Resize(noise_eg.NumRows(), noise_eg.NumCols());
-    noise_mat.CopyFromMat(noise_eg);
-  }
-
-  // select the noise range
-  int32 noise_rows = noise_mat.NumRows();
-  int32 start_row_ind = RandInt(0, noise_rows - input_rows);
-  Matrix<BaseFloat> selected_noise_mat(input_rows, input_cols);
-  selected_noise_mat.AddMat(1.0, noise_mat.Range(start_row_ind, input_rows,
-                                                 0, input_cols));
-  // compute the energy of noise and input
-  Matrix<BaseFloat> input_energy_mat(input_rows, input_cols);
-  input_energy_mat.AddMatMatElements(1.0, input_eg, input_eg, 1.0);
-  double input_energy = input_energy_mat.Sum();
-  Matrix<BaseFloat> noise_energy_mat(input_rows, input_cols);
-  noise_energy_mat.AddMatMatElements(1.0, selected_noise_mat, selected_noise_mat, 1.0);
-  double noise_energy = noise_energy_mat.Sum();
-
-  // In Energy domain, SNR=20log10(S/N). 
-  // 10^(SNR/20) = input_energy / (scale^2 * noise_energy)
-  double scale = input_energy / noise_energy / (pow(10,SNR/20));
-  scale = sqrt(scale);
-  
-  // Add noise mat to input_eg mat
-  perturb_eg->Resize(input_rows, input_cols);
-  perturb_eg->CopyFromMat(input_eg);
-  perturb_eg->AddMat(scale, selected_noise_mat);
+// This function is a entrance. It calls ApplyDistortion to apply different
+// type of distortions on input.
+void PerturbExample(XvectorPerturbOptions opts,
+                    const Matrix<BaseFloat> &input_egs,
+                    Matrix<BaseFloat> *perturb_egs) {
+  //new a PerturbXvectorSignal object and call ApplyDistortion
+  PerturbXvectorSignal perturb_xvector(opts);
+  perturb_xvector.ApplyDistortion(input_egs, perturb_egs);
 }
 // add-end
 
@@ -161,14 +120,6 @@ int main(int argc, char *argv[]) {
     XvectorPerturbOptions perturb_opts;
     perturb_opts.Register(&po);
 
-    // add
-    std::string add_noise_rspecifier;
-    po.Register("add-noise", &add_noise_rspecifier, "specify a file contains some noise egs");
-    int32 snr;
-    po.Register("SNR",&snr,"specify a Signal to Noise Ration.We will scale the noise according \
-                to the original signal and SNR. Normally, it's a non-zero number between -30 and 30");
-    // add-end
-
     po.Read(argc, argv);
     if (po.NumArgs() != 2) {
       po.PrintUsage();
@@ -182,20 +133,6 @@ int main(int argc, char *argv[]) {
      
     NnetExampleWriter example_writer(examples_wspecifier);
 
-    // add
-    // count the number of noise examples and record the key
-    std::vector<std::string> list_noise_egs;
-    SequentialNnetExampleReader noise_seq_reader(add_noise_rspecifier);
-    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
-      std::string key = noise_seq_reader.Key();
-      list_noise_egs.push_back(key);
-    }
-    noise_seq_reader.Close();
-    int32 num_noise_egs = list_noise_egs.size();
-    // initial a RandomAccessTableReader for noise egs
-    RandomAccessNnetExampleReader noise_random_reader(add_noise_rspecifier);
-    // add-end
-
     int64 num_read = 0, num_written = 0;
 
     Matrix<BaseFloat> *noise_mat = NULL;
@@ -215,21 +152,10 @@ int main(int argc, char *argv[]) {
       NnetExample *perturb_eg = new NnetExample();
       Matrix<BaseFloat> perturb_eg_mat, 
         input_eg_mat;
-      input_eg_io.features.CopyToMat(&input_eg_mat);
-      
+      input_eg_io.features.CopyToMat(&input_eg_mat);      
       // add
-      if (!add_noise_rspecifier.empty()) {
-        // random choose a noise example
-        int32 index_noise_egs = RandInt(0, num_noise_egs - 1);
-        std::string key_noise_egs = list_noise_egs[index_noise_egs];
-        const NnetExample &noise_eg = noise_random_reader.Value(key_noise_egs);
-        const NnetIo &noise_eg_io = noise_eg.io[0];
-        
-        Matrix<BaseFloat> noise_eg_mat;
-        noise_eg_io.features.CopyToMat(&noise_eg_mat);
-
-        // deal with add noise
-        ApplyAddAdditiveNoise(snr, input_eg_mat, noise_eg_mat, &perturb_eg_mat);
+      if (!perturb_opts.add_noise_rspecifier.empty()) {
+        PerturbExample(perturb_opts, input_eg_mat, &perturb_eg_mat);
       } else {
         ApplyPerturbation(perturb_opts, input_eg_mat, noise_mat, &perturb_eg_mat);
       }

From e82966f9e41e462ec3b4e9b96687f2c3b8367aec Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Tue, 6 Dec 2016 00:18:09 -0500
Subject: [PATCH 04/23] modify the design style of ApplyAdditiveNoise

---
 src/feat/signal-distort.cc                    | 52 +++++++------------
 src/feat/signal-distort.h                     | 22 +++++---
 .../nnet3-xvector-signal-perturb-egs.cc       | 40 +++++++++-----
 3 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/src/feat/signal-distort.cc b/src/feat/signal-distort.cc
index 25744f788a6..c00acb8c5f7 100644
--- a/src/feat/signal-distort.cc
+++ b/src/feat/signal-distort.cc
@@ -77,8 +77,7 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
 // and add it. So that the perturbed signal is created. 
 void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
                                               const Matrix<BaseFloat> &noise_eg,
-                                              const int32 &SNR,
-                                              Matrix<BaseFloat> *perturb_eg) {
+                                              Matrix<BaseFloat> *perturbed_eg) {
   // In the version, we ask the noise_cols == input_cols.
   int32 input_rows = input_eg.NumRows(), input_cols = input_eg.NumCols();  
   KALDI_ASSERT(noise_eg.NumCols() == input_cols);
@@ -117,48 +116,37 @@ void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input
 
   // In Energy domain, SNR=20log10(S/N). 
   // 10^(SNR/20) = input_energy / (scale^2 * noise_energy)
-  double scale = input_energy / noise_energy / (pow(10,SNR/20));
+  double scale = input_energy / noise_energy / (pow(10,opts_.snr/20));
   scale = sqrt(scale);
   
   // Add noise mat to input_eg mat
-  perturb_eg->Resize(input_rows, input_cols);
-  perturb_eg->CopyFromMat(input_eg);
-  perturb_eg->AddMat(scale, selected_noise_mat);
+  perturbed_eg->Resize(input_rows, input_cols);
+  perturbed_eg->CopyFromMat(input_eg);
+  perturbed_eg->AddMat(scale, selected_noise_mat);
 }
 
 void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
                                            Matrix<BaseFloat> *perturb_egs) {
-  if (!opts_.add_noise_rspecifier.empty()) { // deal with the add_noise ark situdation
-    // count the number of noise examples and record the key
-    std::vector<std::string> list_noise_egs;
-    list_noise_egs.clear();
-    kaldi::nnet3::SequentialNnetExampleReader noise_seq_reader(opts_.add_noise_rspecifier);
-    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
-      std::string key = noise_seq_reader.Key();
-      list_noise_egs.push_back(key);
-    }
-    noise_seq_reader.Close();
-    
-    // random choose a noise_eg and use it.
-    int32 num_noise_egs = list_noise_egs.size();
-    int32 index_noise_egs = RandInt(0, num_noise_egs - 1);
-    std::string key_noise_egs = list_noise_egs[index_noise_egs];
-
-    kaldi::nnet3::RandomAccessNnetExampleReader noise_random_reader(opts_.add_noise_rspecifier);
-    const kaldi::nnet3::NnetExample &noise_eg = noise_random_reader.Value(key_noise_egs);
-    const kaldi::nnet3::NnetIo &noise_eg_io = noise_eg.io[0];
-    Matrix<BaseFloat> noise_eg_mat;
-    noise_eg_io.features.CopyToMat(&noise_eg_mat);
-    int32 SNR = opts_.snr;
-
     // conduct ApplyAdditiveNoise
-    ApplyAdditiveNoise(input_egs, noise_eg_mat, SNR, perturb_egs);
-
+  if (!opts_.add_noise_rspecifier.empty()) {
+    ApplyAdditiveNoise(input_egs, *noise_egs_, perturb_egs);
     // conduct others
     // TODO
   } else { // deal with the opts_.noise_egs situation
     // TODO
   }
 }
-// add-end
+
+// This function is a entrance. It calls ApplyDistortion to apply different
+// type of distortions on input.
+void PerturbExample(XvectorPerturbOptions opts,
+                    const Matrix<BaseFloat> &input_egs,
+                    const Matrix<BaseFloat> &noise_egs,
+                    Matrix<BaseFloat> *perturbed_egs) {
+  //new a PerturbXvectorSignal object and call ApplyDistortion
+  PerturbXvectorSignal perturb_egs(opts);
+  perturb_egs.SetNoiseEgs(noise_egs);
+  perturb_egs.ApplyDistortion(input_egs, perturbed_egs);
+}
+
 } // end of namespace kaldi
diff --git a/src/feat/signal-distort.h b/src/feat/signal-distort.h
index d6c0e3e84ff..af06c235ca7 100644
--- a/src/feat/signal-distort.h
+++ b/src/feat/signal-distort.h
@@ -32,7 +32,6 @@
 #include "feat/resample.h"
 #include "matrix/matrix-functions.h"
 #include "cudamatrix/cu-matrix.h"
-#include "nnet3/nnet-example.h"
 
 namespace kaldi {
 
@@ -45,7 +44,7 @@ struct XvectorPerturbOptions {
   bool rand_distort;
   std::string noise_egs;
   std::string add_noise_rspecifier;
-  int32 snr;
+  BaseFloat snr;
 
   XvectorPerturbOptions(): max_shift(0.2),
                            max_time_stretch(0.2),
@@ -74,16 +73,22 @@ struct XvectorPerturbOptions {
 class PerturbXvectorSignal {
  public:
   PerturbXvectorSignal(XvectorPerturbOptions opts): opts_(opts) { };
-
+  inline void SetNoiseEgs(const Matrix<BaseFloat> &noise_egs) {
+    noise_egs_ = &noise_egs;
+  }
   void ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
                        Matrix<BaseFloat> *perturb_egs);
  private:
   XvectorPerturbOptions opts_;
-
+  // if we want use many examples in once ApplyDistortion, we can expand the point
+  // to a point vector.
+  const Matrix<BaseFloat> *noise_egs_;
+  // I know we can use noise_egs_ instead of noise_eg parameter in this function,
+  // But I keep it. Because we may expand the point to a point vector and choose
+  // one kind noise to call ApplyAdditiveNoise.
   void ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
                           const Matrix<BaseFloat> &noise_eg,
-                          const int32 &SNR,
-                          Matrix<BaseFloat> *perturb_eg);
+                          Matrix<BaseFloat> *perturbed_eg);
 };
 
 
@@ -104,5 +109,10 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
                  BaseFloat max_time_stretch,
                  Matrix<BaseFloat> *perturb_egs);
 
+void PerturbExample(XvectorPerturbOptions opts,
+                    const Matrix<BaseFloat> &input_egs,
+                    const Matrix<BaseFloat> &noise_egs,
+                    Matrix<BaseFloat> *perturbed_egs);
+
 } // end of namespace kaldi
 #endif // KALDI_SIGNAL_DISTORT_H_
diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index 71c297ea18a..6ebfd8f9e4f 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -84,18 +84,6 @@ void ApplyPerturbation(XvectorPerturbOptions opts,
   ////}
 }
 
-// add
-// This function is a entrance. It calls ApplyDistortion to apply different
-// type of distortions on input.
-void PerturbExample(XvectorPerturbOptions opts,
-                    const Matrix<BaseFloat> &input_egs,
-                    Matrix<BaseFloat> *perturb_egs) {
-  //new a PerturbXvectorSignal object and call ApplyDistortion
-  PerturbXvectorSignal perturb_xvector(opts);
-  perturb_xvector.ApplyDistortion(input_egs, perturb_egs);
-}
-// add-end
-
 } // end of namespace nnet3
 } // end of namespace kaldi
 
@@ -145,6 +133,19 @@ int main(int argc, char *argv[]) {
        
     }
 
+    // if we have the add_noise option, we need to record the keys of noise_egs.
+    // It will easy for us to choose a different noise example for each input_eg.
+    std::vector<std::string> list_noise_egs;
+    if (!perturb_opts.add_noise_rspecifier.empty()) {
+      list_noise_egs.clear();
+      SequentialNnetExampleReader noise_seq_reader(perturb_opts.add_noise_rspecifier);
+      for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
+        std::string key = noise_seq_reader.Key();
+        list_noise_egs.push_back(key);
+      }
+      noise_seq_reader.Close();
+    }
+ 
     for (; !example_reader.Done(); example_reader.Next(), num_read++) {
       std::string key = example_reader.Key();
       const NnetExample &input_eg = example_reader.Value();
@@ -153,9 +154,22 @@ int main(int argc, char *argv[]) {
       Matrix<BaseFloat> perturb_eg_mat, 
         input_eg_mat;
       input_eg_io.features.CopyToMat(&input_eg_mat);      
+      
       // add
       if (!perturb_opts.add_noise_rspecifier.empty()) {
-        PerturbExample(perturb_opts, input_eg_mat, &perturb_eg_mat);
+        // For the input example, we firstly random choose an noise example.
+        int32 num_noise_egs = list_noise_egs.size();
+        int32 index_noise_eg = RandInt(0, num_noise_egs - 1);
+        std::string key_noise_eg = list_noise_egs[index_noise_eg];
+
+        RandomAccessNnetExampleReader noise_random_reader(perturb_opts.add_noise_rspecifier);
+        const NnetExample &noise_eg = noise_random_reader.Value(key_noise_eg);
+        const NnetIo &noise_eg_io = noise_eg.io[0];
+        Matrix<BaseFloat> noise_eg_mat;
+        noise_eg_io.features.CopyToMat(&noise_eg_mat);
+
+        // We call the PerturbExample to implement adding distortion.
+        PerturbExample(perturb_opts, input_eg_mat, noise_eg_mat, &perturb_eg_mat);
       } else {
         ApplyPerturbation(perturb_opts, input_eg_mat, noise_mat, &perturb_eg_mat);
       }

From 486314d332b97e9e35d131fd016ba4ee7e3999c0 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Tue, 6 Dec 2016 17:04:56 -0500
Subject: [PATCH 05/23] fix the add_noise from nnet3-examples to matrix

---
 src/feat/signal-distort.cc                    | 22 +++++++++++--
 src/feat/signal-distort.h                     |  5 ++-
 .../nnet3-xvector-signal-perturb-egs.cc       | 32 +++----------------
 3 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/src/feat/signal-distort.cc b/src/feat/signal-distort.cc
index c00acb8c5f7..9cc1a19cb6b 100644
--- a/src/feat/signal-distort.cc
+++ b/src/feat/signal-distort.cc
@@ -128,7 +128,25 @@ void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input
 void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
                                            Matrix<BaseFloat> *perturb_egs) {
     // conduct ApplyAdditiveNoise
-  if (!opts_.add_noise_rspecifier.empty()) {
+  if (!opts_.add_noise.empty()) {
+    // choose a noise from the noise.scp/ark
+    // 1) we need to record the keys of noise_egs
+    std::vector<std::string> list_noise_egs;
+    SequentialBaseFloatMatrixReader noise_seq_reader(opts_.add_noise);
+    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
+      std::string key = noise_seq_reader.Key();
+      list_noise_egs.push_back(key);
+    }
+    noise_seq_reader.Close();
+
+    // 2) we random choose an noise example
+    int32 num_noise_egs = list_noise_egs.size();
+    int32 index_noise_eg = RandInt(0, num_noise_egs - 1);
+    std::string key_noise_eg = list_noise_egs[index_noise_eg];
+    RandomAccessBaseFloatMatrixReader noise_random_reader(opts_.add_noise);
+    Matrix<BaseFloat> noise_eg_mat = noise_random_reader.Value(key_noise_eg);
+    SetNoiseEgs(noise_eg_mat);
+
     ApplyAdditiveNoise(input_egs, *noise_egs_, perturb_egs);
     // conduct others
     // TODO
@@ -141,11 +159,9 @@ void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_eg
 // type of distortions on input.
 void PerturbExample(XvectorPerturbOptions opts,
                     const Matrix<BaseFloat> &input_egs,
-                    const Matrix<BaseFloat> &noise_egs,
                     Matrix<BaseFloat> *perturbed_egs) {
   //new a PerturbXvectorSignal object and call ApplyDistortion
   PerturbXvectorSignal perturb_egs(opts);
-  perturb_egs.SetNoiseEgs(noise_egs);
   perturb_egs.ApplyDistortion(input_egs, perturbed_egs);
 }
 
diff --git a/src/feat/signal-distort.h b/src/feat/signal-distort.h
index af06c235ca7..fb7729da9f4 100644
--- a/src/feat/signal-distort.h
+++ b/src/feat/signal-distort.h
@@ -43,7 +43,7 @@ struct XvectorPerturbOptions {
   int32 negation_prop; 
   bool rand_distort;
   std::string noise_egs;
-  std::string add_noise_rspecifier;
+  std::string add_noise;
   BaseFloat snr;
 
   XvectorPerturbOptions(): max_shift(0.2),
@@ -63,7 +63,7 @@ struct XvectorPerturbOptions {
     opts->Register("noise-egs", &noise_egs, "If supplied, the additive noise is added to input signal.");
     opts->Register("rand_distort", &rand_distort, "If true, the signal is slightly changes"
                    "using some designed FIR filter with no zeros.");
-    opts->Register("add-noise", &add_noise_rspecifier, "specify a file contains some noise egs");
+    opts->Register("add-noise", &add_noise, "specify a file contains some noise egs");
     opts->Register("SNR",&snr,"specify a Signal to Noise Ration. We will scale the noise according"
                 "to the original signal and SNR. Normally, it's a non-zero number between -30 and 30"
                 "default=10");
@@ -111,7 +111,6 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
 
 void PerturbExample(XvectorPerturbOptions opts,
                     const Matrix<BaseFloat> &input_egs,
-                    const Matrix<BaseFloat> &noise_egs,
                     Matrix<BaseFloat> *perturbed_egs);
 
 } // end of namespace kaldi
diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index 6ebfd8f9e4f..59c08c23002 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -102,6 +102,7 @@ int main(int argc, char *argv[]) {
         "e.g.\n"
         "nnet3-xvector-signal-perturb-egs --noise-egs=noise.egs\n"
         "--max-shift=0.2 --max-speed-perturb=0.1 --negation=true\n"
+        "--add-noise=noise.scp --snr=10\n"
         "ark:input.egs akr:distorted.egs\n";
     ParseOptions po(usage);
 
@@ -129,21 +130,7 @@ int main(int argc, char *argv[]) {
       SequentialNnetExampleReader noise_reader(perturb_opts.noise_egs);
       const NnetExample &noise_egs = noise_reader.Value();
       const NnetIo &noise_io = noise_egs.io[0];
-      noise_io.features.CopyToMat(noise_mat);
-       
-    }
-
-    // if we have the add_noise option, we need to record the keys of noise_egs.
-    // It will easy for us to choose a different noise example for each input_eg.
-    std::vector<std::string> list_noise_egs;
-    if (!perturb_opts.add_noise_rspecifier.empty()) {
-      list_noise_egs.clear();
-      SequentialNnetExampleReader noise_seq_reader(perturb_opts.add_noise_rspecifier);
-      for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
-        std::string key = noise_seq_reader.Key();
-        list_noise_egs.push_back(key);
-      }
-      noise_seq_reader.Close();
+      noise_io.features.CopyToMat(noise_mat);       
     }
  
     for (; !example_reader.Done(); example_reader.Next(), num_read++) {
@@ -156,20 +143,9 @@ int main(int argc, char *argv[]) {
       input_eg_io.features.CopyToMat(&input_eg_mat);      
       
       // add
-      if (!perturb_opts.add_noise_rspecifier.empty()) {
-        // For the input example, we firstly random choose an noise example.
-        int32 num_noise_egs = list_noise_egs.size();
-        int32 index_noise_eg = RandInt(0, num_noise_egs - 1);
-        std::string key_noise_eg = list_noise_egs[index_noise_eg];
-
-        RandomAccessNnetExampleReader noise_random_reader(perturb_opts.add_noise_rspecifier);
-        const NnetExample &noise_eg = noise_random_reader.Value(key_noise_eg);
-        const NnetIo &noise_eg_io = noise_eg.io[0];
-        Matrix<BaseFloat> noise_eg_mat;
-        noise_eg_io.features.CopyToMat(&noise_eg_mat);
-
+      if (!perturb_opts.add_noise.empty()) {
         // We call the PerturbExample to implement adding distortion.
-        PerturbExample(perturb_opts, input_eg_mat, noise_eg_mat, &perturb_eg_mat);
+        PerturbExample(perturb_opts, input_eg_mat, &perturb_eg_mat);
       } else {
         ApplyPerturbation(perturb_opts, input_eg_mat, noise_mat, &perturb_eg_mat);
       }

From 12ade04d2fb80d7670757b215cc13727575ec2bc Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Wed, 7 Dec 2016 17:55:58 -0500
Subject: [PATCH 06/23] remove private variable noise_egs_ from class and the
 corresponding change

---
 src/feat/signal-distort.cc | 47 ++++++++++++++++----------------------
 src/feat/signal-distort.h  | 22 ++++++------------
 2 files changed, 27 insertions(+), 42 deletions(-)

diff --git a/src/feat/signal-distort.cc b/src/feat/signal-distort.cc
index 9cc1a19cb6b..3803b904303 100644
--- a/src/feat/signal-distort.cc
+++ b/src/feat/signal-distort.cc
@@ -76,33 +76,30 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
 // the signal level of the orginial signal. According to SNR, we rescale the noise
 // and add it. So that the perturbed signal is created. 
 void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
-                                              const Matrix<BaseFloat> &noise_eg,
+                                              const Matrix<BaseFloat> &noise_mat,
                                               Matrix<BaseFloat> *perturbed_eg) {
   // In the version, we ask the noise_cols == input_cols.
   int32 input_rows = input_eg.NumRows(), input_cols = input_eg.NumCols();  
-  KALDI_ASSERT(noise_eg.NumCols() == input_cols);
+  KALDI_ASSERT(noise_mat.NumCols() == input_cols);
 
-  // According to the rows of noise_eg, form the noise_mat
-  // repeat the noise_eg blocks to have a new block which is longer than input_eg
- 
-  // As the noise_eg is very huge and the input_eg is small normally,
-  // so we'd better not reload the "noise_eg" matrix
+  // As the noise_mat is very huge and the input_eg is small normally,
+  // so we'd better not reload the "noise_mat" matrix
   // select the noise range
 
   Matrix<BaseFloat> selected_noise_mat;
   selected_noise_mat.Resize(input_rows, input_cols);
   
-  int32 noise_rows = noise_eg.NumRows();
+  int32 noise_rows = noise_mat.NumRows();
   int32 start_row_ind = RandInt(0, noise_rows - input_rows);
   
-  if (noise_eg.NumRows() < input_rows) {
+  if (noise_mat.NumRows() < input_rows) {
     int32 indices[input_rows];
     for (int32 i=0; i < input_rows; ++i) {
-      indices[i] = (start_row_ind + i) % noise_eg.NumRows();
+      indices[i] = (start_row_ind + i) % noise_mat.NumRows();
     }
-    selected_noise_mat.CopyRows(noise_eg, indices);
+    selected_noise_mat.CopyRows(noise_mat, indices);
   } else {
-    selected_noise_mat.AddMat(1.0, noise_eg.Range(start_row_ind, input_rows,
+    selected_noise_mat.AddMat(1.0, noise_mat.Range(start_row_ind, input_rows,
                                                   0, input_cols));
   }
 
@@ -127,40 +124,36 @@ void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input
 
 void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
                                            Matrix<BaseFloat> *perturb_egs) {
-    // conduct ApplyAdditiveNoise
   if (!opts_.add_noise.empty()) {
     // choose a noise from the noise.scp/ark
     // 1) we need to record the keys of noise_egs
-    std::vector<std::string> list_noise_egs;
+    std::vector<std::string> noise_list;
     SequentialBaseFloatMatrixReader noise_seq_reader(opts_.add_noise);
     for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
       std::string key = noise_seq_reader.Key();
-      list_noise_egs.push_back(key);
+      noise_list.push_back(key);
     }
     noise_seq_reader.Close();
 
     // 2) we random choose an noise example
-    int32 num_noise_egs = list_noise_egs.size();
-    int32 index_noise_eg = RandInt(0, num_noise_egs - 1);
-    std::string key_noise_eg = list_noise_egs[index_noise_eg];
+    int32 num_noises = noise_list.size();
+    int32 noise_index = RandInt(0, num_noises - 1);
+    std::string noise_name = noise_list[noise_index];
     RandomAccessBaseFloatMatrixReader noise_random_reader(opts_.add_noise);
-    Matrix<BaseFloat> noise_eg_mat = noise_random_reader.Value(key_noise_eg);
-    SetNoiseEgs(noise_eg_mat);
+    Matrix<BaseFloat> noise_mat = noise_random_reader.Value(noise_name);
 
-    ApplyAdditiveNoise(input_egs, *noise_egs_, perturb_egs);
+    // 3) conduct ApplyAdditiveNoise
+    ApplyAdditiveNoise(input_egs, noise_mat, perturb_egs);
     // conduct others
     // TODO
-  } else { // deal with the opts_.noise_egs situation
-    // TODO
-  }
+  } 
 }
 
-// This function is a entrance. It calls ApplyDistortion to apply different
-// type of distortions on input.
+// This function calls ApplyDistortion to apply different type of perturbations.
 void PerturbExample(XvectorPerturbOptions opts,
                     const Matrix<BaseFloat> &input_egs,
                     Matrix<BaseFloat> *perturbed_egs) {
-  //new a PerturbXvectorSignal object and call ApplyDistortion
+  // new a PerturbXvectorSignal object and call ApplyDistortion
   PerturbXvectorSignal perturb_egs(opts);
   perturb_egs.ApplyDistortion(input_egs, perturbed_egs);
 }
diff --git a/src/feat/signal-distort.h b/src/feat/signal-distort.h
index fb7729da9f4..8e45c066cd3 100644
--- a/src/feat/signal-distort.h
+++ b/src/feat/signal-distort.h
@@ -51,7 +51,7 @@ struct XvectorPerturbOptions {
                            frame_dim(80),
                            negation_prop(0.0),
                            rand_distort(false),
-                           snr(10) { }
+                           snr(10.0) { }
   void Register(OptionsItf *opts) { 
     opts->Register("max-shift", &max_shift, "Maximum random shift relative"
                 "to frame length applied to egs.");
@@ -63,31 +63,23 @@ struct XvectorPerturbOptions {
     opts->Register("noise-egs", &noise_egs, "If supplied, the additive noise is added to input signal.");
     opts->Register("rand_distort", &rand_distort, "If true, the signal is slightly changes"
                    "using some designed FIR filter with no zeros.");
-    opts->Register("add-noise", &add_noise, "specify a file contains some noise egs");
-    opts->Register("SNR",&snr,"specify a Signal to Noise Ration. We will scale the noise according"
-                "to the original signal and SNR. Normally, it's a non-zero number between -30 and 30"
-                "default=10");
+    opts->Register("add-noise", &add_noise, "Noise rspecifier for additive noises, if "
+                   "nonempty, the additive noise randomly selected and added to input egs.");
+    opts->Register("SNR",&snr,"Specify a Signal to Noise Ration. We will scale the noise according "
+                   "to the original signal and SNR. Normally, it's a non-zero number between -30 and 30"
+                   "default=10");
   }
 };
 
 class PerturbXvectorSignal {
  public:
   PerturbXvectorSignal(XvectorPerturbOptions opts): opts_(opts) { };
-  inline void SetNoiseEgs(const Matrix<BaseFloat> &noise_egs) {
-    noise_egs_ = &noise_egs;
-  }
   void ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
                        Matrix<BaseFloat> *perturb_egs);
  private:
   XvectorPerturbOptions opts_;
-  // if we want use many examples in once ApplyDistortion, we can expand the point
-  // to a point vector.
-  const Matrix<BaseFloat> *noise_egs_;
-  // I know we can use noise_egs_ instead of noise_eg parameter in this function,
-  // But I keep it. Because we may expand the point to a point vector and choose
-  // one kind noise to call ApplyAdditiveNoise.
   void ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
-                          const Matrix<BaseFloat> &noise_eg,
+                          const Matrix<BaseFloat> &noise_mat,
                           Matrix<BaseFloat> *perturbed_eg);
 };
 

From 9bec26d6f932cac52e79cb4cd417c046e53c16b4 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Wed, 7 Dec 2016 23:04:20 -0500
Subject: [PATCH 07/23] modify the main program

---
 src/feat/signal-distort.h                     |  6 +-
 .../nnet3-xvector-signal-perturb-egs.cc       | 90 ++-----------------
 2 files changed, 7 insertions(+), 89 deletions(-)

diff --git a/src/feat/signal-distort.h b/src/feat/signal-distort.h
index 8e45c066cd3..ccbaa3241cc 100644
--- a/src/feat/signal-distort.h
+++ b/src/feat/signal-distort.h
@@ -42,7 +42,6 @@ struct XvectorPerturbOptions {
   int32 frame_dim;
   int32 negation_prop; 
   bool rand_distort;
-  std::string noise_egs;
   std::string add_noise;
   BaseFloat snr;
 
@@ -53,14 +52,13 @@ struct XvectorPerturbOptions {
                            rand_distort(false),
                            snr(10.0) { }
   void Register(OptionsItf *opts) { 
-    opts->Register("max-shift", &max_shift, "Maximum random shift relative"
-                "to frame length applied to egs.");
+    opts->Register("max-shift", &max_shift, "Maximum random shift relative "
+                   "to frame length applied to egs.");
     opts->Register("max-speed-perturb", &max_time_stretch,
                    "Max speed perturbation applied on egs.");
     opts->Register("frame-dim", &frame_dim,
                    "The numebr of samples in input frame as product of frame_length by samp_freq.");
     opts->Register("negation-prop", &negation_prop, "This proportion of the input value is randomly negated.");
-    opts->Register("noise-egs", &noise_egs, "If supplied, the additive noise is added to input signal.");
     opts->Register("rand_distort", &rand_distort, "If true, the signal is slightly changes"
                    "using some designed FIR filter with no zeros.");
     opts->Register("add-noise", &add_noise, "Noise rspecifier for additive noises, if "
diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index 59c08c23002..351277482a8 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -22,70 +22,6 @@
 #include "feat/signal-distort.h"
 #include "nnet3/nnet-example.h"
 #include "nnet3/nnet-example-utils.h" 
-namespace kaldi {
-namespace nnet3 {
-
-// This function applies different type of perturbation to input_egs.
-// random distortion of inputs, random shifts, adding additive noise,
-// random time stretch and random negations are different type of 
-// distortions used in this function.
-void ApplyPerturbation(XvectorPerturbOptions opts,
-                       const Matrix<BaseFloat> &input_egs,
-                       Matrix<BaseFloat> *noise_egs,
-                       Matrix<BaseFloat> *perturb_egs) {
-
-  PerturbXvectorSignal perturb_xvector(opts);
-  
-  Matrix<BaseFloat> shifted_egs(input_egs);
-  // Generate random shift samples to shift egs. 
-  if (opts.max_shift != 0.0) {
-    int32 max_shift_int = static_cast<int32>(opts.max_shift * opts.frame_dim);
-    // shift input_egs using random shift. 
-    int32 eg_dim = input_egs.NumCols() - opts.frame_dim,
-      shift = RandInt(0, max_shift_int);
-    shifted_egs.CopyFromMat(input_egs.Range(0, input_egs.NumRows(), shift, eg_dim));
-  }
-  
-  Matrix<BaseFloat> rand_distort_shifted_egs(shifted_egs);
-  if (opts.rand_distort) {
-    // randomly generate an zero-phase FIR filter with no zeros.
-    // In future, we can select trucated part of room impluse response
-    // and convolve it with input_egs.
-    ////perturb_xvector.ComputeAndApplyRandDistortion(shifted_egs,
-    ////                              &rand_distort_shifted_egs);
-  }
-
-  if (noise_egs) { 
-    // select random block of noise egs and add to input_egs
-    // number of additive noises should be larger than number of input-egs.
-    KALDI_ASSERT(noise_egs->NumRows() >= input_egs.NumRows());
-    if (noise_egs->NumRows() < input_egs.NumRows()) {
-      // repeat the noise_egs_mat blocks to have same length block
-      // and randomly perturb the rows.
-    } else {
-      // Select random submatrix out of noise_egs and add it to perturb_egs.
-      // we should shuffle noise_egs before passing them to this binary.
-      int32 start_row_ind = RandInt(0, noise_egs->NumRows() - input_egs.NumRows()),
-        start_col_ind = RandInt(0, noise_egs->NumCols() - input_egs.NumCols()); 
-      rand_distort_shifted_egs.AddMat(1.0, noise_egs->Range(start_row_ind, input_egs.NumRows(),
-                                      start_col_ind, input_egs.NumCols()));
-    }
-  }
-  // Perturb speed of signal egs
-  Matrix<BaseFloat> warped_distorted_shifted_egs(rand_distort_shifted_egs);
-  ////if (opts.max_time_stretch != 0.0) 
-  ////  perturb_xvector.TimeStretch(rand_distort_shifted_egs, 
-  ////                              &warped_distorted_shifted_egs);
-   
-  // If nagation is true, the sample values are randomly negated
-  // with some probability.
-  ////if (opts.negation) {
-   
-  ////}
-}
-
-} // end of namespace nnet3
-} // end of namespace kaldi
 
 int main(int argc, char *argv[]) {
   try {
@@ -100,12 +36,11 @@ int main(int argc, char *argv[]) {
         "such as additive noise, negation, random time shifts or random distortion.\n"
         "Usage: nnet3-xvector-signal-perturb-egs [options...] <egs-especifier> <egs-wspecifier>\n"
         "e.g.\n"
-        "nnet3-xvector-signal-perturb-egs --noise-egs=noise.egs\n"
-        "--max-shift=0.2 --max-speed-perturb=0.1 --negation=true\n"
-        "--add-noise=noise.scp --snr=10\n"
+        "nnet3-xvector-signal-perturb-egs --max-shift=0.2"
+        " --max-speed-perturb=0.1 --negation=true --add-noise=noise.scp --snr=10\n"
         "ark:input.egs akr:distorted.egs\n";
-    ParseOptions po(usage);
 
+    ParseOptions po(usage);
     XvectorPerturbOptions perturb_opts;
     perturb_opts.Register(&po);
 
@@ -124,15 +59,6 @@ int main(int argc, char *argv[]) {
 
     int64 num_read = 0, num_written = 0;
 
-    Matrix<BaseFloat> *noise_mat = NULL;
-    // read additive noise egs if it is specified.
-    if (!perturb_opts.noise_egs.empty()) {
-      SequentialNnetExampleReader noise_reader(perturb_opts.noise_egs);
-      const NnetExample &noise_egs = noise_reader.Value();
-      const NnetIo &noise_io = noise_egs.io[0];
-      noise_io.features.CopyToMat(noise_mat);       
-    }
- 
     for (; !example_reader.Done(); example_reader.Next(), num_read++) {
       std::string key = example_reader.Key();
       const NnetExample &input_eg = example_reader.Value();
@@ -142,14 +68,8 @@ int main(int argc, char *argv[]) {
         input_eg_mat;
       input_eg_io.features.CopyToMat(&input_eg_mat);      
       
-      // add
-      if (!perturb_opts.add_noise.empty()) {
-        // We call the PerturbExample to implement adding distortion.
-        PerturbExample(perturb_opts, input_eg_mat, &perturb_eg_mat);
-      } else {
-        ApplyPerturbation(perturb_opts, input_eg_mat, noise_mat, &perturb_eg_mat);
-      }
-      // add-end
+      PerturbExample(perturb_opts, input_eg_mat, &perturb_eg_mat);
+ 
       perturb_eg->io.resize(1.0);
       perturb_eg->io[0].features.SwapFullMatrix(&perturb_eg_mat);
       example_writer.Write(key, *perturb_eg);

From fb78d055adef7e7bb1553a1f117907caabf93ba8 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Thu, 8 Dec 2016 18:08:52 -0500
Subject: [PATCH 08/23] fix the signal distortion

---
 src/feat/signal-distort.cc                    | 59 +++++++++----------
 src/feat/signal-distort.h                     | 17 +++---
 .../nnet3-xvector-signal-perturb-egs.cc       |  3 +-
 3 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/src/feat/signal-distort.cc b/src/feat/signal-distort.cc
index 3803b904303..c71f8e967db 100644
--- a/src/feat/signal-distort.cc
+++ b/src/feat/signal-distort.cc
@@ -72,6 +72,17 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
   perturb_egs->CopyFromMat(out_mat);
 }
 
+PerturbXvectorSignal::PerturbXvectorSignal(XvectorPerturbOptions opts): opts_(opts) {
+  if (!opts_.add_noise.empty()) {
+    // initialize the noise_list_
+    SequentialBaseFloatMatrixReader noise_seq_reader(opts_.add_noise);
+    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
+      std::string key = noise_seq_reader.Key();
+      noise_list_.push_back(key);
+    }
+    noise_seq_reader.Close();
+  }
+}
 // This function add the noise to the orginial signal. We should not normalize 
 // the signal level of the orginial signal. According to SNR, we rescale the noise
 // and add it. So that the perturbed signal is created. 
@@ -122,40 +133,26 @@ void PerturbXvectorSignal::ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input
   perturbed_eg->AddMat(scale, selected_noise_mat);
 }
 
-void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
-                                           Matrix<BaseFloat> *perturb_egs) {
-  if (!opts_.add_noise.empty()) {
-    // choose a noise from the noise.scp/ark
-    // 1) we need to record the keys of noise_egs
-    std::vector<std::string> noise_list;
-    SequentialBaseFloatMatrixReader noise_seq_reader(opts_.add_noise);
-    for (; !noise_seq_reader.Done(); noise_seq_reader.Next()) {
-      std::string key = noise_seq_reader.Key();
-      noise_list.push_back(key);
-    }
-    noise_seq_reader.Close();
-
-    // 2) we random choose an noise example
-    int32 num_noises = noise_list.size();
-    int32 noise_index = RandInt(0, num_noises - 1);
-    std::string noise_name = noise_list[noise_index];
-    RandomAccessBaseFloatMatrixReader noise_random_reader(opts_.add_noise);
-    Matrix<BaseFloat> noise_mat = noise_random_reader.Value(noise_name);
-
-    // 3) conduct ApplyAdditiveNoise
-    ApplyAdditiveNoise(input_egs, noise_mat, perturb_egs);
-    // conduct others
-    // TODO
-  } 
+void PerturbXvectorSignal::ApplyDistortion(const MatrixBase<BaseFloat> &input_eg,
+                                           Matrix<BaseFloat> *perturbed_eg) {
+  // we random choose an noise example
+  int32 num_noises = noise_list_.size();
+  int32 noise_index = RandInt(0, num_noises - 1);
+  std::string noise_name = noise_list_[noise_index];
+  RandomAccessBaseFloatMatrixReader noise_random_reader(opts_.add_noise);
+  Matrix<BaseFloat> noise_mat = noise_random_reader.Value(noise_name);
+
+  // conduct ApplyAdditiveNoise
+  ApplyAdditiveNoise(input_eg, noise_mat, perturbed_eg);
+  // conduct others
+  // TODO
 }
 
 // This function calls ApplyDistortion to apply different type of perturbations.
-void PerturbExample(XvectorPerturbOptions opts,
-                    const Matrix<BaseFloat> &input_egs,
-                    Matrix<BaseFloat> *perturbed_egs) {
-  // new a PerturbXvectorSignal object and call ApplyDistortion
-  PerturbXvectorSignal perturb_egs(opts);
-  perturb_egs.ApplyDistortion(input_egs, perturbed_egs);
+void PerturbExample(PerturbXvectorSignal &eg_perturber,
+                    const Matrix<BaseFloat> &input_eg,
+                    Matrix<BaseFloat> *perturbed_eg) {
+  eg_perturber.ApplyDistortion(input_eg, perturbed_eg);
 }
 
 } // end of namespace kaldi
diff --git a/src/feat/signal-distort.h b/src/feat/signal-distort.h
index ccbaa3241cc..b8fc1542b4d 100644
--- a/src/feat/signal-distort.h
+++ b/src/feat/signal-distort.h
@@ -71,14 +71,15 @@ struct XvectorPerturbOptions {
 
 class PerturbXvectorSignal {
  public:
-  PerturbXvectorSignal(XvectorPerturbOptions opts): opts_(opts) { };
-  void ApplyDistortion(const MatrixBase<BaseFloat> &input_egs,
-                       Matrix<BaseFloat> *perturb_egs);
- private:
-  XvectorPerturbOptions opts_;
+  PerturbXvectorSignal(XvectorPerturbOptions opts);
+  void ApplyDistortion(const MatrixBase<BaseFloat> &input_eg,
+                       Matrix<BaseFloat> *perturbed_eg);
   void ApplyAdditiveNoise(const MatrixBase<BaseFloat> &input_eg,
                           const Matrix<BaseFloat> &noise_mat,
                           Matrix<BaseFloat> *perturbed_eg);
+ private:
+  XvectorPerturbOptions opts_;
+  std::vector<std::string> noise_list_;
 };
 
 
@@ -99,9 +100,9 @@ void TimeStretch(const MatrixBase<BaseFloat> &input_egs,
                  BaseFloat max_time_stretch,
                  Matrix<BaseFloat> *perturb_egs);
 
-void PerturbExample(XvectorPerturbOptions opts,
-                    const Matrix<BaseFloat> &input_egs,
-                    Matrix<BaseFloat> *perturbed_egs);
+void PerturbExample(PerturbXvectorSignal &eg_perturber,
+                    const Matrix<BaseFloat> &input_eg,
+                    Matrix<BaseFloat> *perturbed_eg);
 
 } // end of namespace kaldi
 #endif // KALDI_SIGNAL_DISTORT_H_
diff --git a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
index 351277482a8..d32c9a66d60 100644
--- a/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
+++ b/src/xvectorbin/nnet3-xvector-signal-perturb-egs.cc
@@ -59,6 +59,7 @@ int main(int argc, char *argv[]) {
 
     int64 num_read = 0, num_written = 0;
 
+    PerturbXvectorSignal eg_perturber(perturb_opts);
     for (; !example_reader.Done(); example_reader.Next(), num_read++) {
       std::string key = example_reader.Key();
       const NnetExample &input_eg = example_reader.Value();
@@ -68,7 +69,7 @@ int main(int argc, char *argv[]) {
         input_eg_mat;
       input_eg_io.features.CopyToMat(&input_eg_mat);      
       
-      PerturbExample(perturb_opts, input_eg_mat, &perturb_eg_mat);
+      PerturbExample(eg_perturber, input_eg_mat, &perturb_eg_mat);
  
       perturb_eg->io.resize(1.0);
       perturb_eg->io[0].features.SwapFullMatrix(&perturb_eg_mat);

From 48d51aae47d41e57f3bad8bb146fe018d5a00e9c Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Tue, 13 Dec 2016 20:24:10 -0500
Subject: [PATCH 09/23] write nnet3-fvector-get-egs.cc

---
 src/fvectorbin/Makefile                 |  25 ++++
 src/fvectorbin/nnet3-fvector-get-egs.cc | 155 ++++++++++++++++++++++++
 2 files changed, 180 insertions(+)
 create mode 100644 src/fvectorbin/Makefile
 create mode 100644 src/fvectorbin/nnet3-fvector-get-egs.cc

diff --git a/src/fvectorbin/Makefile b/src/fvectorbin/Makefile
new file mode 100644
index 00000000000..7d826881cf6
--- /dev/null
+++ b/src/fvectorbin/Makefile
@@ -0,0 +1,25 @@
+
+all:
+EXTRA_CXXFLAGS = -Wno-sign-compare
+include ../kaldi.mk
+
+LDFLAGS += $(CUDA_LDFLAGS)
+LDLIBS += $(CUDA_LDLIBS)
+
+BINFILES = nnet3-fvector-get-egs
+
+OBJFILES =
+
+# Add this dependency to force cuda-compiled.o to be rebuilt when we reconfigure.
+cuda-compiled.o: ../kaldi.mk
+
+TESTFILES =
+
+ADDLIBS = ../xvector/kaldi-xvector.a ../nnet3/kaldi-nnet3.a ../gmm/kaldi-gmm.a \
+         ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a  \
+         ../transform/kaldi-transform.a ../tree/kaldi-tree.a  \
+         ../thread/kaldi-thread.a ../feat/kaldi-feat.a ../cudamatrix/kaldi-cudamatrix.a \
+         ../matrix/kaldi-matrix.a ../fstext/kaldi-fstext.a \
+         ../util/kaldi-util.a ../base/kaldi-base.a
+
+include ../makefiles/default_rules.mk
diff --git a/src/fvectorbin/nnet3-fvector-get-egs.cc b/src/fvectorbin/nnet3-fvector-get-egs.cc
new file mode 100644
index 00000000000..2f7fdbfa748
--- /dev/null
+++ b/src/fvectorbin/nnet3-fvector-get-egs.cc
@@ -0,0 +1,155 @@
+// fvectorbin/nnet3-fvector-get-egs.cc
+
+// Copyright 2016  Johns Hopkins University (author:  Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sstream>
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "hmm/transition-model.h"
+#include "hmm/posterior.h"
+#include "nnet3/nnet-example.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+
+static void ProcessFile(const MatrixBase<BaseFloat> &feats,
+                        const std::string &utt_id,
+                        bool compress,
+                        int32 left_context,
+                        int32 right_context,
+                        int32 frames_per_eg,
+                        int64 *num_frames_written,
+                        int64 *num_egs_written,
+                        NnetExampleWriter *example_writer) {
+  for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) {
+
+    // actual_frames_per_eg is the number of frames in center.
+    // At the end of the file we pad with zero posteriors
+    // so that all examples have the same structure (prevents the need
+    // for recompilations).
+    int32 actual_frames_per_eg = std::min(frames_per_eg,
+                                          feats.NumRows() - t);
+
+    int32 tot_frames = left_context + frames_per_eg + right_context;
+
+    Matrix<BaseFloat> input_frames(tot_frames, feats.NumCols(), kUndefined);
+    
+    // Set up "input_frames".
+    for (int32 j = -left_context; j < frames_per_eg + right_context; j++) {
+      int32 t2 = j + t;
+      if (t2 < 0) t2 = 0;
+      if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1;
+      SubVector<BaseFloat> src(feats, t2),
+                           dest(input_frames, j + left_context);
+      dest.CopyFromVec(src);
+    }
+
+    NnetExample eg;
+    
+    // call the regular input "input".
+    eg.io.push_back(NnetIo("input", -left_context, input_frames));
+   
+    if (compress) { eg.Compress();}
+      
+    std::ostringstream os;
+    os << utt_id << "-" << t;
+
+    std::string key = os.str(); // key is <utt_id>-<frame_id>
+
+    *num_frames_written += actual_frames_per_eg;
+    *num_egs_written += 1;
+
+    example_writer->Write(key, eg);
+  }
+}
+
+
+} // namespace nnet3
+} // namespace kaldi
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace kaldi::nnet3;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Get frame-by-frame examples of data for nnet3 neural network training.\n"
+        "Essentially this is a format change from features into a special frame-by-frame format.\n"
+        "This program handles the common case where you have some input features\n"
+        "and convert them to fvector examples format\n"
+        "Note: In fvector version, there is no need for iVectors, posterior and labels.\n"
+        "\n"
+        "Usage:  nnet3-fvector-get-egs [options] <features-rspecifier> <egs-out>\n"
+        "\n"
+        "An example [where $feats expands to the actual features]:\n"
+        "nnet3-fvector-get-egs --left-context=12 --right-context=9 --compress=true \"$feats\" \\\n"
+        "\"ark:train.egs\"\n";
+        
+
+    bool compress = true;
+    int32 left_context = 0, right_context = 0, num_frames = 1;
+    
+    ParseOptions po(usage);
+    po.Register("compress", &compress, "If true, write egs in "
+                "compressed format.");
+    po.Register("left-context", &left_context, "Number of frames of left "
+                "context the neural net requires.");
+    po.Register("right-context", &right_context, "Number of frames of right "
+                "context the neural net requires.");
+    po.Register("num-frames", &num_frames, "Number of frames is central");
+    
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string feature_rspecifier = po.GetArg(1),
+                examples_wspecifier = po.GetArg(2);
+
+    // Read in all the training files.
+    SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
+    NnetExampleWriter example_writer(examples_wspecifier);
+    
+    int32 num_done = 0;
+    int64 num_frames_written = 0, num_egs_written = 0;
+    
+    for (; !feat_reader.Done(); feat_reader.Next()) {
+      std::string key = feat_reader.Key();
+      const Matrix<BaseFloat> &feats = feat_reader.Value();
+      ProcessFile(feats, key, compress, left_context, right_context,
+                  num_frames, &num_frames_written, &num_egs_written,
+                  &example_writer);
+      num_done++;
+    }
+
+    KALDI_LOG << "Finished generating examples, "
+              << "successfully processed " << num_done
+              << " feature files, wrote " << num_egs_written << " examples, "
+              << " with " << num_frames_written << " egs in total.";
+    return (num_egs_written == 0 || num_done == 0 ? 1 : 0);
+  } catch(const std::exception &e) {
+    std::cerr << e.what() << '\n';
+    return -1;
+  }
+}

From 25f3e1b443a2f70800c788e0869ca4ff4287c461 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Sun, 18 Dec 2016 23:26:36 -0500
Subject: [PATCH 10/23] about generate noise range

---
 .../nnet3/fvector/generate_noise_range.py     | 127 ++++++++++++++++++
 .../s5/steps/nnet3/fvector/lh_add_noise.sh    |  41 ++++++
 2 files changed, 168 insertions(+)
 create mode 100644 egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
 create mode 100644 egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh

diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
new file mode 100644
index 00000000000..db4d35a4325
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import re, os, argparse, sys, math, warnings, random
+
+parser = argparse.ArgumentParser(description="Generate n kinds of noise range for each original wav"
+                                 epilog="Called by steps/nnet3/fvector/lh_add_noise.sh")
+parser.add_argument("--num-kind-range", type=int, default=4,
+                    help="the number of kinds of noise ranges")
+parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
+                    help="the minimum duration of each noise file")
+parser.add_argument("--min-snr", type=int, default=0,
+                    help="the minimum Signal-to-Noise Rate, the default=0")
+parser.add_argument("--max-snr", type=int, default=-10,
+                    help="the maximum Signal-to-Noise Rate, the default=-10")
+parser.add_argument("--seed", type=int, default=-1,
+                    help="Seed for random number generator")
+
+# now the positional arguments
+parser.add_argument("wav_utt2dur",
+                    help="utt2dur file of the original wav to be used as input (format is: "
+                    "<utterance-id> <duration>")
+parser.add_argument("noise_utt2dur",
+                    help="utt2dur file of the noise wav to be used as input (format is: "
+                    "<utterance-id> <duration>")
+parser.add_argument("rangs_dir",
+                    help="Name of ranges directory, exp/fxvector/ranges")
+
+print(' '.join(sys.argv))
+
+args = parser.parse_args()
+
+## Check arguments
+if args.min_snr < args.max_snr:
+    sys.exit("For SNR, the less numerical value is, the larger noise is. So --min-snr bigger "
+             "than --max-snr in numerical value.")
+
+random.seed(args.seed)
+
+# deal with the original wav utt2dur
+f = open(args.wav_utt2dur, "r")
+if f is None:
+    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
+wav_utt_ids = []
+wav_lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("bad line in wav_utt2dur file " + line)
+    wav_utt_ids.append(a[0])
+    wav_lengths.append(a[1])
+f.close()
+
+wav_num_utts = len(wav_utt_ids)
+
+# deal with the noise wav utt2dur
+f = open(args.noise_utt2dur, "r")
+if f is None:
+    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
+noise_utt_ids = []
+noise_lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("bad line in noise_utt2dur file " + line);
+    if a[1] <  args.min_additive_noise_len:
+        sys.exit("bad line in noise_utt2dur file " + line);
+    noise_utt_ids.append(a[0])
+    noise_lengths.append(a[1])
+f.close()
+
+noise_num_utts = len(noise_utt_ids)
+
+# generate the range file for each original wav file
+for i in range(0, wav_num_utts):
+   
+    # decide the number of noises which will be add to 
+    current_wav_len = wav_lengths[i]
+    max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
+    upperbound_num_additive_noise = min(max_num_additive_noise, noise_num_utts)
+
+    # select a number from [1 ... upperbound_num_additive_noise]
+    num_additive_noise = random.randrange(1, upperbound_num_additive_noise + 1)
+    
+    # decide the length of each noise, minus 0.01 to prevent overstep
+    len_additive_noise = float('{:.2f}'.format(current_wav_len / num_additive)) - 0.01
+
+    # We generate $num_kind_range ranges
+    for j in range(0, args.num_kind_range):
+ 
+        # create a file to record the ranges
+        f = open(args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".range." + str(j), "w")
+        if f is None:
+            sys.exit("Error open file " + args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".ranges." + str(j))
+        
+        # generate range file
+        # format: wav_t_start, wav_t_end, noise_name, noise_t_start, noise_t_end, snr
+        for k in range(0, num_additive_noise):
+            wav_t_start = flat('{:.2f}'.format(k * len_additive_noise))
+            
+            noise_index = random.randrange(0, noise_num_utts)
+            current_noise_name = noise_utt_ids[noise_index]
+            current_noise_len = noise_lengths[noise_index]
+            
+            upperbound_add_len = min(len_additive_noise, current_noise_len)
+            current_add_len = float('{:.2f}'.format(random.randrange(0, upperbound_add_len, 0.01)))
+            
+            noise_start_bound = float('{:.2f}'.format(current_noise_len - current_add_len))
+            noise_t_start = float('{:.2f}'.format(random.randrange(0, noise_start_bound)))
+            noise_t_end = noise_t_start + current_add_len
+
+            wav_t_end = wav_t_start + current_add_len
+
+            current_snr = random.randrange(args.max_snr, args.min_snr)
+
+            print("{0} {1} {2} {3} {4} {5} {6}".format(wav_t_start,
+                                                       wav_t_end,
+                                                       current_noise_name,
+                                                       noise_t_start,
+                                                       noise_t_end,
+                                                       current_snr,
+                  file=f)
+        f.close()
+        
+print("generate_noise_range.py: finished generate the range files for all wav")        
+
+
diff --git a/egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh
new file mode 100644
index 00000000000..6b81af6c6ec
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Begin Configuration section
+stage=0
+min_additive_noise_len=2 # the minimum duration of each noise file
+num_kind_range=4         # the number of kinds of noise ranges
+min_snr=0                # the minimum snr value
+max_snr=0                # the maximum snr value
+seed=-1                  # set the random seed
+
+# End Configuration section
+
+data=$1  # contain wav.scp
+noise=$2 # contain noise.scp 
+dir=$3   # eg: ranges/
+
+
+if [ ! -f $data/utt2dur ]; then
+  # get original clean wav's duration
+  utils/data/get_utt2dur.sh $data 
+fi
+
+if [ ! -f $noise/utt2dur ]; then
+  # get the duration of each noise file
+  utils/data/get_utt2dur.sh $noise
+fi
+
+mkdir -p $dir/log
+if [ $stage -le 0 ]; then
+  echo "$0: generate $num_kind_rage kinds of noise range for each original wav"
+  $cmd $dir/log/generate_noise_range.log \
+    steps/nnet3/fvector/generate_noise_range.py \
+      --num-kind-range=$num_kind_range \
+      --min-additive-noise-len=$min_additive_noise_len \
+      --min-snr=$min_snr \
+      --max-snr=$max_snr \
+      --seed=$seed \
+      $data/utt2dur $noise/utt2dur $dir
+fi
+
+exit 0

From 969d31fddf154630aa9b7a7ec5df50b9a37a6baa Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Thu, 22 Dec 2016 13:01:41 -0500
Subject: [PATCH 11/23] two version perturb signal

---
 .../fvector/generate_fixed_length_range.py    | 185 +++++++++++++++++
 .../fvector/generate_variable_length_range.py | 188 ++++++++++++++++++
 .../nnet3-fvector-perturb-signal.cc           | 179 +++++++++++++++++
 3 files changed, 552 insertions(+)
 create mode 100644 egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
 create mode 100644 egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
 create mode 100644 src/fvectorbin/nnet3-fvector-perturb-signal.cc

diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
new file mode 100644
index 00000000000..90072c98a6a
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+
+# The function use to generate range file for fvector
+# This is the fixed-length version
+# The format is <wav_t_start> <wav_t_end> <noise_uttid> <noise_t_start> <noise_t_end> <snr>
+
+# For <wav_t_start> <wav_t_end>
+# We except the last fragement, the length will be a fixed value T.
+
+# For <noise_uttid>
+# It is randomly selected from noise list, which is longer than --min-additive-noise-len
+
+# For <noise_t_start> <noise_t_end>
+# If the noise file is longer than fixed value. We randomly select the start point and 
+# the length will be fixed value T.
+# If the noise file is shorter than T. We select the whole noise.
+
+# The <snr> control the rate of signal and noise. In the other word, scale the amplitude of noise.
+
+from __future__ import print_function
+import re, os, argparse, sys, math, warnings, random
+
+parser = argparse.ArgumentParser(description="Generate N noise range files for each original wav. The file"
+                                 "which created by this python code will be supplied to fixed-length add "
+                                 "additive noise program.",
+                                 epilog="Called by steps/nnet3/fvector/add_noise.sh")
+parser.add_argument("--num-kind-range", type=int, default=4,
+                    help="the number of noise range files")
+parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
+                    help="the minimum duration/length of each noise file")
+parser.add_argument("--min-snr", type=int, default=0,
+                    help="the minimum Signal-to-Noise Rate, the default=0")
+parser.add_argument("--max-snr", type=int, default=-10,
+                    help="the maximum Signal-to-Noise Rate, the default=-10")
+parser.add_argument("--seed", type=int, default=-1,
+                    help="Seed for random number generator")
+
+# now the positional arguments
+parser.add_argument("wav_utt2dur",
+                    help="utt2dur file of the original wav to be used as input (format is: "
+                    "<utterance-id> <duration>")
+parser.add_argument("noise_utt2dur",
+                    help="utt2dur file of the noise wav to be used as input (format is: "
+                    "<utterance-id> <duration>")
+parser.add_argument("rangs_dir",
+                    help="Name of ranges directory, exp/fxvector/ranges")
+
+print(' '.join(sys.argv))
+
+args = parser.parse_args()
+
+## Check arguments
+if args.min_snr < args.max_snr:
+    sys.exit("For SNR, the less numerical value is, the larger noise is. So --min-snr bigger "
+             "than --max-snr in numerical value.")
+
+random.seed(args.seed)
+
+# deal with the original wav utt2dur
+# the information was stored in wav_utt_ids[], wav_lengths[] and wav_num_utts
+f = open(args.wav_utt2dur, "r")
+if f is None:
+    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
+wav_utt_ids = []
+wav_lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("Bad line in wav_utt2dur file " + line)
+    if float(a[1]) < args.min_additive_noise_len:
+	sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
+    wav_utt_ids.append(a[0])
+    wav_lengths.append(float(a[1]))
+f.close()
+
+wav_num_utts = len(wav_utt_ids)
+
+# deal with the noise wav utt2dur
+# remove the noise whose length < --min-additive-noise-len
+num_error = 0
+num_done = 0
+f = open(args.noise_utt2dur, "r")
+if f is None:
+    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
+noise_utt_ids = []
+noise_lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("bad line in noise_utt2dur file " + line);
+    if float(a[1]) < args.min_additive_noise_len:
+        num_error += 1
+        continue
+    noise_utt_ids.append(a[0])
+    noise_lengths.append(float(a[1]))
+    num_done += 1
+f.close()
+noise_num_utts = len(noise_utt_ids)
+noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
+             "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
+             str(num_done) + " noise file."
+sys.stdout.write( noise_str + '\n')
+
+num_error = 0
+num_done = 0
+# generate the range file for each original wav file
+for i in range(0, wav_num_utts):
+    # decide the number of noises which will be add to 
+    current_wav_len = wav_lengths[i]
+    max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
+    
+    if max_num_additive_noise > noise_num_utts:
+        print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
+        num_error += 1
+        continue
+
+    # We generate $num_kind_range ranges
+    for j in range(0, args.num_kind_range):
+        # select a number from [1 ... max_num_additive_noise]
+        num_additive_noise = random.randint(1, max_num_additive_noise)
+    
+        # decide the length of each noise, minus 0.01 to prevent overstep
+        len_additive_noise = float('{:.2f}'.format(current_wav_len / num_additive_noise)) - 0.01
+
+        # create a file to record the ranges
+        f = open(args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".range." + str(j), "w")
+        if f is None:
+            sys.exit("Error open file " + args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".ranges." + str(j))
+        # generate range file
+        # format: wav_t_start, wav_t_end, noise_name, noise_t_start, noise_t_end, snr
+        for k in range(0, num_additive_noise - 1):
+            wav_t_start = float('{:.2f}'.format(k * len_additive_noise))
+            wav_t_end = wav_t_start + len_additive_noise
+			
+            noise_index = random.randrange(0, noise_num_utts)
+            current_noise_name = noise_utt_ids[noise_index]
+            current_noise_len = noise_lengths[noise_index]
+            if current_noise_len <= len_additive_noise:
+	        noise_t_start = 0.0
+		noise_t_end = current_noise_len
+	    else :
+	        noise_start_bound = float('{:.2f}'.format(current_noise_len - len_additive_noise))
+                noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+                noise_t_end = noise_t_start + len_additive_noise
+
+            current_snr = random.randrange(args.max_snr, args.min_snr)
+
+            print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+                                                   wav_t_end,
+                                                   current_noise_name,
+                                                   noise_t_start,
+                                                   noise_t_end,
+                                                   current_snr),
+                  file=f)
+	    # deal with the last noise, which cover the rest
+        k = num_additive_noise - 1
+	wav_t_start = float('{:.2f}'.format(k * len_additive_noise))
+        wav_t_end = current_wav_len
+
+	noise_index = random.randrange(0, noise_num_utts)
+        current_noise_name = noise_utt_ids[noise_index]
+        current_noise_len = noise_lengths[noise_index]
+
+	if current_noise_len <= (wav_t_end - wav_t_start):
+	    noise_t_start = 0.0
+	    noise_t_end = current_noise_len
+	else :
+	    noise_start_bound = float('{:.2f}'.format(current_noise_len - wav_t_end + wav_t_start))
+            noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+            noise_t_end = noise_t_start + wav_t_end - wav_t_start		
+		
+	current_snr = random.randrange(args.max_snr, args.min_snr)
+
+        print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+                                               wav_t_end,
+                                               current_noise_name,
+                                               noise_t_start,
+                                               noise_t_end,
+                                               current_snr),
+              file=f)
+		
+        f.close()
+	num_done += 1
+		
+print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_error, num_done) )
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
new file mode 100644
index 00000000000..67d0fd0d5ad
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+
+# The function use to generate range file for fvector
+# This is the variable-length version
+# The format is <wav_t_start> <wav_t_end> <noise_uttid> <noise_t_start> <noise_t_end> <snr>
+
+# For <wav_t_start> <wav_t_end>
+# We except the last fragement, the length will be random.
+
+# For <noise_uttid>
+# It is randomly selected from noise list, which is longer than --min-additive-noise-len
+
+# For <noise_t_start> <noise_t_end>
+# If the noise file is longer than wav length. We randomly select the start point and 
+# the length will be the same as wav length.
+# If the noise file is shorter than T. We select the whole noise.
+
+# For <snr>, it was used to control the amplitude of noise
+
+from __future__ import print_function
+import re, os, argparse, sys, math, warnings, random
+
+parser = argparse.ArgumentParser(description="Generate N noise range files for each original wav. The file"
+                                 "which created by this python code will be supplied to variable-length "
+                                 "and additive noise program.",
+                                 epilog="Called by steps/nnet3/fvector/add_noise.sh")
+parser.add_argument("--num-kind-range", type=int, default=4,
+                    help="the number of noise range files")
+parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
+                    help="the minimum duration/length of each noise file")
+parser.add_argument("--min-snr", type=int, default=0,
+                    help="the minimum Signal-to-Noise Rate, the default=0")
+parser.add_argument("--max-snr", type=int, default=-10,
+                    help="the maximum Signal-to-Noise Rate, the default=-10")
+parser.add_argument("--seed", type=int, default=-1,
+                    help="Seed for random number generator")
+
+# now the positional arguments
+parser.add_argument("wav_utt2dur",
+                    help="utt2dur file of the original wav to be used as input (format is: "
+                    "<utterance-id> <duration>")
+parser.add_argument("noise_utt2dur",
+                    help="utt2dur file of the noise wav to be used as input (format is: "
+                    "<utterance-id> <duration>")
+parser.add_argument("rangs_dir",
+                    help="Name of ranges directory, exp/fxvector/ranges")
+
+print(' '.join(sys.argv))
+
+args = parser.parse_args()
+
+## Check arguments
+if args.min_snr < args.max_snr:
+    sys.exit("For SNR, the less numerical value is, the larger noise is. So --min-snr bigger "
+             "than --max-snr in numerical value.")
+
+random.seed(args.seed)
+
+# deal with the original wav utt2dur
+# the information was stored in wav_utt_ids[], wav_lengths[] and wav_num_utts
+f = open(args.wav_utt2dur, "r")
+if f is None:
+    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
+wav_utt_ids = []
+wav_lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("bad line in wav_utt2dur file " + line)
+	if float(a[1]) < args.min_additive_noise_len:
+	    sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
+    wav_utt_ids.append(a[0])
+    wav_lengths.append(float(a[1]))
+f.close()
+
+wav_num_utts = len(wav_utt_ids)
+
+# deal with the noise wav utt2dur
+# remove the noise whose length < --min-additive-noise-len
+num_error = 0
+num_done = 0
+f = open(args.noise_utt2dur, "r")
+if f is None:
+    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
+noise_utt_ids = []
+noise_lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("bad line in noise_utt2dur file " + line);
+    if float(a[1]) < args.min_additive_noise_len:
+        num_error += 1
+        continue
+    noise_utt_ids.append(a[0])
+    noise_lengths.append(float(a[1]))
+    num_done += 1
+f.close()
+noise_num_utts = len(noise_utt_ids)
+noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
+             "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
+             str(num_done) + " noise file."
+sys.stdout.write( noise_str + '\n')
+
+num_error = 0
+num_done = 0
+# generate the range file for each original wav file
+for i in range(0, wav_num_utts):
+
+    # check the noise list has enough sample or not
+    current_wav_len = wav_lengths[i]
+    max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
+    
+    if max_num_additive_noise > noise_num_utts:
+        print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
+        num_error += 1
+        continue
+
+    # We generate $num_kind_range ranges
+    for j in range(0, args.num_kind_range):
+
+        # create a file to record the ranges
+        f = open(args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".range." + str(j), "w")
+        if f is None:
+            sys.exit("Error open file " + args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".ranges." + str(j))
+        # generate range file
+        # format: wav_t_start, wav_t_end, noise_name, noise_t_start, noise_t_end, snr
+        the_rest = current_wav_len
+        wav_t_start = 0.0
+        wav_t_end = 0.0
+        while (the_rest > float(args.min_additive_noise_len)):
+	    # firstly, we randomly choose a kind of noise and snr
+	    noise_index = random.randrange(0, noise_num_utts)
+            current_noise_name = noise_utt_ids[noise_index]
+            current_noise_len = noise_lengths[noise_index]
+	    current_snr = random.randrange(args.max_snr, args.min_snr)
+			
+            # Secondly, we randomly select a fragement of the noise file.
+            noise_start_bound = float('{:.2f}'.format(current_noise_len - float(args.min_additive_noise_len)))
+            noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+	    noise_end_upperbound = float('{:.2f}'.format(noise_t_start + float(args.min_additive_noise_len)))
+	    noise_end_lowerbound = float('{:.2f}'.format(min((noise_t_start + the_rest), current_noise_len)))
+            noise_t_end = float('{:.2f}'.format(random.uniform(noise_end_upperbound, noise_end_lowerbound)))
+	    current_noise_length = noise_t_end - noise_t_start
+			
+	    # Thirdly, we generate the start and end point of wav
+            wav_t_start = wav_t_end #the new start is the end of the last.
+	    wav_t_end = wav_t_start + current_noise_length
+			
+	    # Forthly, update the_rest
+	    the_rest = the_rest - current_noise_length
+	    
+            # Fifthly, print
+	    print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+                                                   wav_t_end,
+                                                   current_noise_name,
+                                                   noise_t_start,
+                                                   noise_t_end,
+                                                   current_snr),
+                  file=f)
+	# deal with the bit of wav
+	# firstly, we randomly choose a kind of noise and snr
+	noise_index = random.randrange(0, noise_num_utts)
+        current_noise_name = noise_utt_ids[noise_index]
+        current_noise_len = noise_lengths[noise_index]
+	current_snr = random.randrange(args.max_snr, args.min_snr)
+		
+	# Secondly, we randomly select a fragement of the noise file.
+        noise_start_bound = float('{:.2f}'.format(current_noise_len - the_rest))
+        noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+	noise_t_end = noise_t_start + the_rest
+	current_noise_length = noise_t_end - noise_t_start
+		
+	# Thirdly, we generate the start and end point of wav
+        wav_t_start = wav_t_end #the new start is the end of the last.
+	wav_t_end = wav_t_start + current_noise_length
+		
+	# Forthly, print
+	print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+                                               wav_t_end,
+                                               current_noise_name,
+                                               noise_t_start,
+                                               noise_t_end,
+                                               current_snr),
+              file=f)
+        f.close()  			
+	num_done += 1
+		
+print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_error, num_done) )
diff --git a/src/fvectorbin/nnet3-fvector-perturb-signal.cc b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
new file mode 100644
index 00000000000..02e13656b74
--- /dev/null
+++ b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
@@ -0,0 +1,179 @@
+// fvector/nnet3-fvector-perturb-signal.cc
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "feat/wave-reader.h"
+#include "feat/signal.h"
+
+namespace kaldi {
+
+struct NoiseController{
+  BaseFloat wav_t_start_;
+  BaseFloat wav_t_end_;
+  std::string noise_uttid_;
+  BaseFloat noise_t_start_;
+  BaseFloat noise_t_end_;
+  BaseFloat snr_;
+
+  NoiseController(BaseFloat wav_t_start, BaseFloat wav_t_end, std::string noise_uttid,
+                  BaseFloat noise_t_start, BaseFloat noise_t_end, BaseFloat snr):
+    wav_t_start_(wav_t_start), wav_t_end_(wav_t_end), noise_uttid_(noise_uttid),
+    noise_t_start_(noise_t_start), noise_t_end_(noise_t_end), snr_(snr) { }
+};
+
+
+}
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+
+    const char *usage =
+        "Perturb the wave files supplied via the specified noise-range file\n"
+        "Usage:  nnet3-fvector-perturb-signal [options...] <wav-in-rxfilename> "
+        "<wav-out-wxfilename>\n"
+        "e.g.\n"
+        "nnet3-fvector-perturb-signal --noise-range-file=uttid.range.n --add-noise-list="
+        "scp:noise.scp --input-channel=0 input.wav output.wav\n";
+
+    ParseOptions po(usage);
+    
+    std::string noise_range_file;
+    std::string noise_list_rspecifier;
+    int32 input_channel = 0;
+
+    po.Register("noise-range-file",&noise_range_file,
+                "Provide a range file. We use the content in this file to control "
+                "the process of adding noise. The format of each line in this file "
+                ":<wav_t_start> <wav_t_end> <noise_utt_id> <noise_t_start> "
+                "<noise_t_end> <snr>");
+    po.Register("add-noise-list",&noise_list_rspecifier,
+                "There is a list of optional noise. It need to match the "
+                "--noise-range-file.");
+    po.Register("input-channel",&input_channel,
+                "Specifies the channel to be used in input file");
+    
+    po.Read(argc, argv);
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string input_wave_file = po.GetArg(1);
+    std::string output_wave_file = po.GetArg(2);
+
+    // Genterate the Noise Controller list
+    std::vector<NoiseController> controller;
+    if (noise_range_file != "") {
+      std::ifstream fi(noise_range_file.c_str());
+      if (!fi) {
+        KALDI_ERR << "failed to open file " << noise_range_file;
+      }
+      BaseFloat wav_t_start;
+      BaseFloat wav_t_end;
+      std::string noise_uttid;
+      BaseFloat noise_t_start;
+      BaseFloat noise_t_end;
+      BaseFloat snr;
+      while (fi >> wav_t_start >> wav_t_end >> noise_uttid >> noise_t_start >> noise_t_end >> snr) {
+        controller.push_back(NoiseController(wav_t_start, wav_t_end, noise_uttid,
+                                            noise_t_start, noise_t_end, snr));
+      }
+    }
+
+    WaveData input_wave;
+    {
+      WaveHolder waveholder;
+      Input ki(input_wave_file);
+      waveholder.Read(ki.Stream());
+      input_wave = waveholder.Value();
+    }
+
+    // about input wav
+    const Matrix<BaseFloat> &input_matrix = input_wave.Data();
+    BaseFloat samp_freq_input = input_wave.SampFreq();
+    int32 num_samp_input = input_matrix.NumCols(),  // #samples in the input
+          num_input_channel = input_matrix.NumRows();  // #channels in the input
+    KALDI_VLOG(1) << "Sampling frequency of input: " << samp_freq_input
+                  << "the number of samples: " << num_samp_input
+                  << "the number of channels: " << num_input_channel;
+    KALDI_ASSERT(input_channel < num_input_channel);
+    Vector<BaseFloat> input(num_samp_input);
+    input.CopyRowFromMat(input_matrix, input_channel);
+
+    // new output vector
+    Vector<BaseFloat> output(input);
+
+    // about noise list
+    RandomAccessTableReader<WaveHolder> noise_reader(noise_list_rspecifier);
+
+    // add noise
+    for (int i=0; i < controller.size(); ++i) {
+      const WaveData &noise_wav = noise_reader.Value(controller[i].noise_uttid_);
+      BaseFloat samp_freq_noise = noise_wav.SampFreq();
+      KALDI_ASSERT(samp_freq_input == samp_freq_noise);
+      
+      const Matrix<BaseFloat> &noise_matrix = noise_wav.Data();
+      int32 num_samp_noise = noise_matrix.NumCols();
+      Vector<BaseFloat> noise(num_samp_noise);
+      noise.CopyRowFromMat(noise_matrix, 0);
+
+      int32 input_start_point = samp_freq_input * controller[i].wav_t_start_;
+      int32 input_end_point = samp_freq_input * controller[i].wav_t_end_ - 1;
+      int32 noise_start_point = samp_freq_noise * controller[i].noise_t_start_;
+      int32 noise_end_point = samp_freq_noise * controller[i].noise_t_end_ - 1;
+      BaseFloat snr = controller[i].snr_;
+
+      SubVector<BaseFloat> input_part(input, input_start_point,
+                                      input_end_point - input_start_point + 1);
+      SubVector<BaseFloat> noise_part(noise, noise_start_point,
+                                      noise_end_point - noise_start_point + 1);
+      Vector<BaseFloat> selected_noise(input_part.Dim());
+      if (noise_part.Dim() < input_part.Dim()) {
+        int32 the_rest = selected_noise.Dim();
+        while (the_rest > noise_part.Dim()) {
+          selected_noise.Range(selected_noise.Dim()-the_rest,
+                                    noise_part.Dim()).CopyFromVec(noise_part);
+          the_rest = the_rest - noise_part.Dim();
+        }
+        selected_noise.Range(selected_noise.Dim()-the_rest, the_rest).CopyFromVec(
+            noise_part.Range(0, the_rest));
+      } else {
+        selected_noise.CopyFromVec(noise_part);
+      }
+      
+      BaseFloat input_energy = VecVec(input_part, input_part);
+      BaseFloat noise_energy = VecVec(selected_noise, selected_noise);
+      BaseFloat scale_factor = sqrt(input_energy/ noise_energy/ (pow(10, snr/20)) );
+      output.Range(input_start_point, input_part.Dim()).AddVec(scale_factor, selected_noise);
+    }
+
+    Matrix<BaseFloat> out_matrix(1, num_samp_input);
+    out_matrix.CopyRowsFromVec(output);
+
+    WaveData out_wave(samp_freq_input, out_matrix);
+    Output ko(output_wave_file, false);
+    out_wave.Write(ko.Stream());
+
+    return 0;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+

From 51856f78bbddebf11ad0c3573d35adceaf7a7482 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Wed, 28 Dec 2016 22:35:09 -0500
Subject: [PATCH 12/23] modify the python files to generate noise_range; fix
 the binary of perturb-signal; make a simple bash script

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   |  64 ++++
 .../fvector/generate_fixed_length_range.py    | 116 +++----
 .../nnet3/fvector/generate_noise_range.py     | 304 ++++++++++++++----
 .../fvector/generate_variable_length_range.py | 112 ++++---
 .../nnet3-fvector-perturb-signal.cc           | 167 +++++-----
 5 files changed, 520 insertions(+), 243 deletions(-)
 create mode 100644 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
new file mode 100644
index 00000000000..4a7169a3b42
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright 2016  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin Configuration section.
+stage=0
+cmd=run.pl
+nj=4
+# Begain Configuration.
+min_additive_noise_len=2.0       # the minimum duration of each noise file
+num_kind_range=4                 # the number of kinds of noise ranges
+min_snr=-5                       # the minimum snr value
+max_snr=-15                      # the maximum snr value
+seed=-1                          # set the random seed
+variable_len_additive_noise=true #If true, generate the variable-length range files
+                                 #If false, generate the fixed-length range files
+# End Configuration options.
+
+echo "$0 $@" # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "usage: steps/nnet3/fvector/add_noise.sh <data-dir> <noise-dir> <range-dir>"
+  echo "e.g.:  steps/nnet3/fvector/add_noise.sh data/train data/noise ranges"
+  echo "main options (for others, see top of script file)"
+  echo "  --min-additive-noise-len <second>                # limit the minimum length of noise" 
+  echo "  --num-kind-range <n>                             # number of noise range kinds"
+  echo "  --variable-len-additive-noise (true|false)       # decide fixed/variable version"
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs"
+fi
+
+data=$1  # contain wav.scp
+noise=$2 # contain noise.scp 
+dir=$3   # eg: ranges/
+
+
+if [ ! -f $data/utt2dur ]; then
+  # get original clean wav's duration
+  utils/data/get_utt2dur.sh $data 
+fi
+
+if [ ! -f $noise/utt2dur ]; then
+  # get the duration of each noise file
+  utils/data/get_utt2dur.sh $noise
+fi
+
+mkdir -p $dir/log
+if [ $stage -le 0 ]; then
+  echo "$0: generate $num_kind_rage kinds of noise range for each original wav"
+  $cmd $dir/log/generate_noise_range.log \
+    steps/nnet3/fvector/generate_noise_range.py \
+      --num-kind-range=$num_kind_range \
+      --min-additive-noise-len=$min_additive_noise_len \
+      --min-snr=$min_snr \
+      --max-snr=$max_snr \
+      --variable-len-additive-noise $variable_len_additive_noise \
+      --seed=$seed \
+      $data/utt2dur $noise/utt2dur $dir
+fi
+
+exit 0
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
index 90072c98a6a..84a41541163 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
@@ -2,10 +2,12 @@
 
 # The function use to generate range file for fvector
 # This is the fixed-length version
-# The format is <wav_t_start> <wav_t_end> <noise_uttid> <noise_t_start> <noise_t_end> <snr>
+# Each line of the range-file corrsponds to a kind of perturbed wav. In each line,
+# we use comma to seperate different addnoise range. The format of each addnoise
+# range is <wav_t_start>:<wav_t_end>:<noise_uttid>:<noise_t_start>:<noise_t_end>:<snr>
 
 # For <wav_t_start> <wav_t_end>
-# We except the last fragement, the length will be a fixed value T.
+# Except the last fragement, the length will be a fixed value T.
 
 # For <noise_uttid>
 # It is randomly selected from noise list, which is longer than --min-additive-noise-len
@@ -16,16 +18,18 @@
 # If the noise file is shorter than T. We select the whole noise.
 
 # The <snr> control the rate of signal and noise. In the other word, scale the amplitude of noise.
+# The snr will be randomly selected form the range (max-snr, min-snr).
 
 from __future__ import print_function
 import re, os, argparse, sys, math, warnings, random
 
-parser = argparse.ArgumentParser(description="Generate N noise range files for each original wav. The file"
-                                 "which created by this python code will be supplied to fixed-length add "
+parser = argparse.ArgumentParser(description="Generate a noise range files which contains "
+                                 "N lines corresponding to the number of kinds for each original wav. "
+                                 "The file created by this python code will be supplied to fixed-length add "
                                  "additive noise program.",
                                  epilog="Called by steps/nnet3/fvector/add_noise.sh")
 parser.add_argument("--num-kind-range", type=int, default=4,
-                    help="the number of noise range files")
+                    help="the number of expected addnoise kinds")
 parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
                     help="the minimum duration/length of each noise file")
 parser.add_argument("--min-snr", type=int, default=0,
@@ -36,13 +40,13 @@
                     help="Seed for random number generator")
 
 # now the positional arguments
-parser.add_argument("wav_utt2dur",
-                    help="utt2dur file of the original wav to be used as input (format is: "
+parser.add_argument("wav2dur",
+                    help="wav2dur file of the original wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("noise_utt2dur",
-                    help="utt2dur file of the noise wav to be used as input (format is: "
+parser.add_argument("noise2dur",
+                    help="noise2dur file of the noise wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("rangs_dir",
+parser.add_argument("range_dir",
                     help="Name of ranges directory, exp/fxvector/ranges")
 
 print(' '.join(sys.argv))
@@ -57,108 +61,112 @@
 random.seed(args.seed)
 
 # deal with the original wav utt2dur
-# the information was stored in wav_utt_ids[], wav_lengths[] and wav_num_utts
-f = open(args.wav_utt2dur, "r")
+# the information was stored in wav_ids[], wav_lengths[] and wav_num_utts
+f = open(args.wav2dur, "r")
 if f is None:
-    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
-wav_utt_ids = []
+    sys.exit("Error opening wav2dur file " + str(args.wav2dur))
+wav_ids = []
 wav_lengths = []
 for line in f:
     a = line.split()
     if len(a) != 2:
-        sys.exit("Bad line in wav_utt2dur file " + line)
+        sys.exit("Bad line in wav2dur file " + line)
     if float(a[1]) < args.min_additive_noise_len:
 	sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
-    wav_utt_ids.append(a[0])
+    wav_ids.append(a[0])
     wav_lengths.append(float(a[1]))
 f.close()
 
-wav_num_utts = len(wav_utt_ids)
+wav_num_utts = len(wav_ids)
 
 # deal with the noise wav utt2dur
 # remove the noise whose length < --min-additive-noise-len
 num_error = 0
 num_done = 0
-f = open(args.noise_utt2dur, "r")
+f = open(args.noise2dur, "r")
 if f is None:
-    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
-noise_utt_ids = []
+    sys.exit("Error opening noise2dur file " + str(args.noise2dur))
+noise_ids = []
 noise_lengths = []
 for line in f:
     a = line.split()
     if len(a) != 2:
-        sys.exit("bad line in noise_utt2dur file " + line);
+        sys.exit("Bad line in noise2dur file " + line);
     if float(a[1]) < args.min_additive_noise_len:
         num_error += 1
         continue
-    noise_utt_ids.append(a[0])
+    noise_ids.append(a[0])
     noise_lengths.append(float(a[1]))
     num_done += 1
 f.close()
-noise_num_utts = len(noise_utt_ids)
+noise_num_utts = len(noise_ids)
 noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
              "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
              str(num_done) + " noise file."
 sys.stdout.write( noise_str + '\n')
 
-num_error = 0
-num_done = 0
-# generate the range file for each original wav file
+num_fixed_error = 0
+num_fixed_done = 0
+
 for i in range(0, wav_num_utts):
     # decide the number of noises which will be add to 
     current_wav_len = wav_lengths[i]
     max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
-    
+   
     if max_num_additive_noise > noise_num_utts:
         print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
-        num_error += 1
+        num_fixed_error += 1
         continue
 
+    # create a file to record the ranges
+    f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
+    if f is None:
+        sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
+
     # We generate $num_kind_range ranges
     for j in range(0, args.num_kind_range):
+        # print the perturbed wav id in the beginning of line
+        print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
+            
         # select a number from [1 ... max_num_additive_noise]
         num_additive_noise = random.randint(1, max_num_additive_noise)
     
         # decide the length of each noise, minus 0.01 to prevent overstep
-        len_additive_noise = float('{:.2f}'.format(current_wav_len / num_additive_noise)) - 0.01
-
-        # create a file to record the ranges
-        f = open(args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".range." + str(j), "w")
-        if f is None:
-            sys.exit("Error open file " + args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".ranges." + str(j))
-        # generate range file
-        # format: wav_t_start, wav_t_end, noise_name, noise_t_start, noise_t_end, snr
+        additive_noise_len = float('{:.2f}'.format(current_wav_len / num_additive_noise)) - 0.01
+
+        # generate one line of file
+        # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
         for k in range(0, num_additive_noise - 1):
-            wav_t_start = float('{:.2f}'.format(k * len_additive_noise))
-            wav_t_end = wav_t_start + len_additive_noise
+            wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
+            wav_t_end = wav_t_start + additive_noise_len
 			
             noise_index = random.randrange(0, noise_num_utts)
-            current_noise_name = noise_utt_ids[noise_index]
+            current_noise_name = noise_ids[noise_index]
             current_noise_len = noise_lengths[noise_index]
-            if current_noise_len <= len_additive_noise:
-	        noise_t_start = 0.0
-		noise_t_end = current_noise_len
+            if current_noise_len <= additive_noise_len:
+                noise_t_start = 0.0
+                noise_t_end = current_noise_len
 	    else :
-	        noise_start_bound = float('{:.2f}'.format(current_noise_len - len_additive_noise))
+	        noise_start_bound = float('{:.2f}'.format(current_noise_len - additive_noise_len))
                 noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
-                noise_t_end = noise_t_start + len_additive_noise
+                noise_t_end = noise_t_start + additive_noise_len
 
             current_snr = random.randrange(args.max_snr, args.min_snr)
 
-            print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+            print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
                                                    wav_t_end,
                                                    current_noise_name,
                                                    noise_t_start,
                                                    noise_t_end,
                                                    current_snr),
-                  file=f)
-	    # deal with the last noise, which cover the rest
+                  end=",",file=f)
+	# deal with the last noise, which cover the rest
         k = num_additive_noise - 1
-	wav_t_start = float('{:.2f}'.format(k * len_additive_noise))
+	wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
         wav_t_end = current_wav_len
 
 	noise_index = random.randrange(0, noise_num_utts)
-        current_noise_name = noise_utt_ids[noise_index]
+        current_noise_name = noise_ids[noise_index]
         current_noise_len = noise_lengths[noise_index]
 
 	if current_noise_len <= (wav_t_end - wav_t_start):
@@ -171,15 +179,13 @@
 		
 	current_snr = random.randrange(args.max_snr, args.min_snr)
 
-        print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+        print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
                                                wav_t_end,
                                                current_noise_name,
                                                noise_t_start,
                                                noise_t_end,
                                                current_snr),
               file=f)
-		
-        f.close()
-	num_done += 1
-		
-print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_error, num_done) )
+	num_fixed_done += 1
+    f.close()
+print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_fixed_error, num_fixed_done) )
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
index db4d35a4325..fa42e030e85 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
@@ -1,29 +1,67 @@
 #!/usr/bin/env python
 
+# The function use to generate range-file for fvector
+# Each line of the range-file corrsponds to a kind of perturbed wav. In each line,
+# we use comma to seperate different addnoise range. The format of each addnoise
+# range is <wav_t_start>:<wav_t_end>:<noise_uttid>:<noise_t_start>:<noise_t_end>:<snr>
+# The line which starts with the asterisk(*) is the differences between two versions.
+
+# For the fixed-length version:
+# *For <wav_t_start> <wav_t_end>
+# *Except the last fragement, the length will be a fixed value T.
+# For <noise_uttid>
+# It is randomly selected from noise list, which is longer than --min-additive-noise-len
+# *For <noise_t_start> <noise_t_end>
+# *If the noise file is longer than fixed value. We randomly select the start point and 
+# *the length will be fixed value T.
+# *If the noise file is shorter than T. We select the whole noise.
+# The <snr> control the rate of signal and noise. In the other word, scale the amplitude of noise.
+# The snr will be randomly selected form the range (max-snr, min-snr).
+
+# For the variable-length version:
+# *For <wav_t_start> <wav_t_end>
+# *Except the last fragement, the length will be random.
+# For <noise_uttid>
+# It is randomly selected from noise list, which is longer than --min-additive-noise-len
+# *For <noise_t_start> <noise_t_end>
+# *If the noise file is longer than wav length. We randomly select the start point and 
+# *the length will be the same as wav length.
+# *If the noise file is shorter than T. We select the whole noise.
+# For <snr>, it was used to control the amplitude of noise
+# It will be randomly selected from the range (max-snr, min-snr)
+
+
 from __future__ import print_function
 import re, os, argparse, sys, math, warnings, random
 
-parser = argparse.ArgumentParser(description="Generate n kinds of noise range for each original wav"
-                                 epilog="Called by steps/nnet3/fvector/lh_add_noise.sh")
+parser = argparse.ArgumentParser(description="Generate a noise range-file which contains "
+                                 "N lines corresponding to the number of kinds for each original wav. "
+                                 "The file which created by this python code will be supplied to "
+                                 "add additive noise program.",
+                                 epilog="Called by steps/nnet3/fvector/add_noise.sh")
 parser.add_argument("--num-kind-range", type=int, default=4,
-                    help="the number of kinds of noise ranges")
+                    help="the number of expected addnoise kinds")
 parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
-                    help="the minimum duration of each noise file")
-parser.add_argument("--min-snr", type=int, default=0,
+                    help="the minimum duration/length of each noise file")
+parser.add_argument("--min-snr", type=int, default=-5,
                     help="the minimum Signal-to-Noise Rate, the default=0")
-parser.add_argument("--max-snr", type=int, default=-10,
+parser.add_argument("--max-snr", type=int, default=-15,
                     help="the maximum Signal-to-Noise Rate, the default=-10")
 parser.add_argument("--seed", type=int, default=-1,
                     help="Seed for random number generator")
+parser.add_argument("--variable-len-additive-noise", type=str,
+                    help="If true, generate the variable-length range files for each original wavform file."
+                    "If false, generate the fixed-length range files for each original wavform file.",
+                    default="false", choices = ["false", "true"])
 
 # now the positional arguments
-parser.add_argument("wav_utt2dur",
-                    help="utt2dur file of the original wav to be used as input (format is: "
+parser.add_argument("wav2dur",
+                    help="wav2dur file of the original wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("noise_utt2dur",
-                    help="utt2dur file of the noise wav to be used as input (format is: "
+parser.add_argument("noise2dur",
+                    help="noise2dur file of the noise wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("rangs_dir",
+parser.add_argument("range_dir",
                     help="Name of ranges directory, exp/fxvector/ranges")
 
 print(' '.join(sys.argv))
@@ -38,90 +76,228 @@
 random.seed(args.seed)
 
 # deal with the original wav utt2dur
-f = open(args.wav_utt2dur, "r")
+# the information was stored in wav_ids[], wav_lengths[] and wav_num_utts
+f = open(args.wav2dur, "r")
 if f is None:
-    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
-wav_utt_ids = []
+    sys.exit("Error opening wav2dur file " + str(args.wav2dur))
+wav_ids = []
 wav_lengths = []
 for line in f:
     a = line.split()
     if len(a) != 2:
-        sys.exit("bad line in wav_utt2dur file " + line)
-    wav_utt_ids.append(a[0])
-    wav_lengths.append(a[1])
+        sys.exit("Bad line in wav2dur file " + line)
+    if float(a[1]) < args.min_additive_noise_len:
+	sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
+    wav_ids.append(a[0])
+    wav_lengths.append(float(a[1]))
 f.close()
 
-wav_num_utts = len(wav_utt_ids)
+wav_num_utts = len(wav_ids)
 
 # deal with the noise wav utt2dur
-f = open(args.noise_utt2dur, "r")
+# remove the noise whose length < --min-additive-noise-len
+num_error = 0
+num_done = 0
+f = open(args.noise2dur, "r")
 if f is None:
-    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
-noise_utt_ids = []
+    sys.exit("Error opening noise2dur file " + str(args.noise2dur))
+noise_ids = []
 noise_lengths = []
 for line in f:
     a = line.split()
     if len(a) != 2:
-        sys.exit("bad line in noise_utt2dur file " + line);
-    if a[1] <  args.min_additive_noise_len:
-        sys.exit("bad line in noise_utt2dur file " + line);
-    noise_utt_ids.append(a[0])
-    noise_lengths.append(a[1])
+        sys.exit("Bad line in noise2dur file " + line);
+    if float(a[1]) < args.min_additive_noise_len:
+        num_error += 1
+        continue
+    noise_ids.append(a[0])
+    noise_lengths.append(float(a[1]))
+    num_done += 1
 f.close()
+noise_num_utts = len(noise_ids)
+noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
+             "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
+             str(num_done) + " noise file."
+sys.stdout.write( noise_str + '\n')
 
-noise_num_utts = len(noise_utt_ids)
+# This function generates the fixed-length range files
+def GenerateFixedLengthRangeFile():
+    num_fixed_error = 0
+    num_fixed_done = 0
 
-# generate the range file for each original wav file
-for i in range(0, wav_num_utts):
-   
-    # decide the number of noises which will be add to 
-    current_wav_len = wav_lengths[i]
-    max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
-    upperbound_num_additive_noise = min(max_num_additive_noise, noise_num_utts)
-
-    # select a number from [1 ... upperbound_num_additive_noise]
-    num_additive_noise = random.randrange(1, upperbound_num_additive_noise + 1)
+    for i in range(0, wav_num_utts):
+        # decide the number of noises which will be add to 
+        current_wav_len = wav_lengths[i]
+        max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
     
-    # decide the length of each noise, minus 0.01 to prevent overstep
-    len_additive_noise = float('{:.2f}'.format(current_wav_len / num_additive)) - 0.01
+        if max_num_additive_noise > noise_num_utts:
+            print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
+            num_fixed_error += 1
+            continue
 
-    # We generate $num_kind_range ranges
-    for j in range(0, args.num_kind_range):
- 
         # create a file to record the ranges
-        f = open(args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".range." + str(j), "w")
+        f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
         if f is None:
-            sys.exit("Error open file " + args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".ranges." + str(j))
-        
-        # generate range file
-        # format: wav_t_start, wav_t_end, noise_name, noise_t_start, noise_t_end, snr
-        for k in range(0, num_additive_noise):
-            wav_t_start = flat('{:.2f}'.format(k * len_additive_noise))
-            
-            noise_index = random.randrange(0, noise_num_utts)
-            current_noise_name = noise_utt_ids[noise_index]
-            current_noise_len = noise_lengths[noise_index]
-            
-            upperbound_add_len = min(len_additive_noise, current_noise_len)
-            current_add_len = float('{:.2f}'.format(random.randrange(0, upperbound_add_len, 0.01)))
+            sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
+
+        # We generate $num_kind_range ranges
+        for j in range(0, args.num_kind_range):
+            # print the perturbed wav id in the beginning of line
+            print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
             
-            noise_start_bound = float('{:.2f}'.format(current_noise_len - current_add_len))
-            noise_t_start = float('{:.2f}'.format(random.randrange(0, noise_start_bound)))
-            noise_t_end = noise_t_start + current_add_len
+            # select a number from [1 ... max_num_additive_noise]
+            num_additive_noise = random.randint(1, max_num_additive_noise)
+    
+            # decide the length of each noise, minus 0.01 to prevent overstep
+            additive_noise_len = float('{:.2f}'.format(current_wav_len / num_additive_noise)) - 0.01
 
-            wav_t_end = wav_t_start + current_add_len
+            # generate one line of file
+            # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
+            for k in range(0, num_additive_noise - 1):
+                wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
+                wav_t_end = wav_t_start + additive_noise_len
+			
+                noise_index = random.randrange(0, noise_num_utts)
+                current_noise_name = noise_ids[noise_index]
+                current_noise_len = noise_lengths[noise_index]
+                if current_noise_len <= additive_noise_len:
+	            noise_t_start = 0.0
+		    noise_t_end = current_noise_len
+	        else :
+	            noise_start_bound = float('{:.2f}'.format(current_noise_len - additive_noise_len))
+                    noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+                    noise_t_end = noise_t_start + additive_noise_len
 
-            current_snr = random.randrange(args.max_snr, args.min_snr)
+                current_snr = random.randrange(args.max_snr, args.min_snr)
 
-            print("{0} {1} {2} {3} {4} {5} {6}".format(wav_t_start,
+                print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
                                                        wav_t_end,
                                                        current_noise_name,
                                                        noise_t_start,
                                                        noise_t_end,
-                                                       current_snr,
+                                                       current_snr),
+                      end=",",file=f)
+	    # deal with the last noise, which cover the rest
+            k = num_additive_noise - 1
+	    wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
+            wav_t_end = current_wav_len
+
+	    noise_index = random.randrange(0, noise_num_utts)
+            current_noise_name = noise_ids[noise_index]
+            current_noise_len = noise_lengths[noise_index]
+
+	    if current_noise_len <= (wav_t_end - wav_t_start):
+	        noise_t_start = 0.0
+	        noise_t_end = current_noise_len
+	    else :
+	        noise_start_bound = float('{:.2f}'.format(current_noise_len - wav_t_end + wav_t_start))
+                noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+                noise_t_end = noise_t_start + wav_t_end - wav_t_start		
+		
+	    current_snr = random.randrange(args.max_snr, args.min_snr)
+
+            print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
+                                                   wav_t_end,
+                                                   current_noise_name,
+                                                   noise_t_start,
+                                                   noise_t_end,
+                                                   current_snr),
                   file=f)
+	    num_fixed_done += 1
         f.close()
+    print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_fixed_error, num_fixed_done) )
+
+# This function generates the variable-length range files
+def GenerateVariableLengthRangeFile():
+    num_variable_error = 0
+    num_variable_done = 0
+
+    for i in range(0, wav_num_utts):
+
+        # check the noise list has enough sample or not
+        current_wav_len = wav_lengths[i]
+        max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
+    
+        if max_num_additive_noise > noise_num_utts:
+            print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
+            num_variable_error += 1
+            continue
+
+        # create a file to record the ranges
+        f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
+        if f is None:
+            sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
         
-print("generate_noise_range.py: finished generate the range files for all wav")        
+        # We generate $num_kind_range ranges
+        for j in range(0, args.num_kind_range):
+            # print the perturbed wav id in the beginning of line
+            print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
 
+            # generate range file
+            # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
+            the_rest = current_wav_len
+            wav_t_start = 0.0
+            wav_t_end = 0.0
+            while (the_rest > float(args.min_additive_noise_len)):
+	        # firstly, we randomly choose a kind of noise and snr
+	        noise_index = random.randrange(0, noise_num_utts)
+                current_noise_name = noise_ids[noise_index]
+                current_noise_len = noise_lengths[noise_index]
+	        current_snr = random.randrange(args.max_snr, args.min_snr)
+			
+                # Secondly, we randomly select a fragement of the noise file.
+                noise_start_bound = float('{:.2f}'.format(current_noise_len - float(args.min_additive_noise_len)))
+                noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+	        noise_end_upperbound = float('{:.2f}'.format(noise_t_start + float(args.min_additive_noise_len)))
+	        noise_end_lowerbound = float('{:.2f}'.format(min((noise_t_start + the_rest), current_noise_len)))
+                noise_t_end = float('{:.2f}'.format(random.uniform(noise_end_upperbound, noise_end_lowerbound)))
+	        current_noise_length = noise_t_end - noise_t_start
+			
+	        # Thirdly, we generate the start and end point of wav
+                wav_t_start = wav_t_end #the new start is the end of the last.
+	        wav_t_end = wav_t_start + current_noise_length
+			
+	        # Forthly, update the_rest
+	        the_rest = the_rest - current_noise_length
+	    
+                # Fifthly, print
+                print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
+                                                       wav_t_end,
+                                                       current_noise_name,
+                                                       noise_t_start,
+                                                       noise_t_end,
+                                                       current_snr),
+                      end=",",file=f)
+	    # deal with the bit of wav
+	    # firstly, we randomly choose a kind of noise and snr
+	    noise_index = random.randrange(0, noise_num_utts)
+            current_noise_name = noise_ids[noise_index]
+            current_noise_len = noise_lengths[noise_index]
+	    current_snr = random.randrange(args.max_snr, args.min_snr)
+		
+	    # Secondly, we randomly select a fragement of the noise file.
+            noise_start_bound = float('{:.2f}'.format(current_noise_len - the_rest))
+            noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
+	    noise_t_end = noise_t_start + the_rest
+	    current_noise_length = noise_t_end - noise_t_start
+		
+	    # Thirdly, we generate the start and end point of wav
+            wav_t_start = wav_t_end #the new start is the end of the last.
+	    wav_t_end = wav_t_start + current_noise_length
+		
+	    # Forthly, print
+            print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
+                                                   wav_t_end,
+                                                   current_noise_name,
+                                                   noise_t_start,
+                                                   noise_t_end,
+                                                   current_snr),
+                  file=f)		
+	    num_variable_done += 1
+	f.close()
+    print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_variable_error, num_variable_done) )
 
+if args.variable_len_additive_noise == "true":
+    GenerateVariableLengthRangeFile()
+else:
+    GenerateFixedLengthRangeFile()
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
index 67d0fd0d5ad..ef0fe9e02a2 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
@@ -2,10 +2,12 @@
 
 # The function use to generate range file for fvector
 # This is the variable-length version
-# The format is <wav_t_start> <wav_t_end> <noise_uttid> <noise_t_start> <noise_t_end> <snr>
+# Each line of the range-file corrsponds to a kind of perturbed wav. In each line,
+# we use comma to seperate different addnoise range. The format of each addnoise
+# range is <wav_t_start>:<wav_t_end>:<noise_uttid>:<noise_t_start>:<noise_t_end>:<snr>
 
 # For <wav_t_start> <wav_t_end>
-# We except the last fragement, the length will be random.
+# Except the last fragement, the length will be random.
 
 # For <noise_uttid>
 # It is randomly selected from noise list, which is longer than --min-additive-noise-len
@@ -16,12 +18,14 @@
 # If the noise file is shorter than T. We select the whole noise.
 
 # For <snr>, it was used to control the amplitude of noise
+# It will be randomly selected from the range (max-snr, min-snr)
 
 from __future__ import print_function
 import re, os, argparse, sys, math, warnings, random
 
-parser = argparse.ArgumentParser(description="Generate N noise range files for each original wav. The file"
-                                 "which created by this python code will be supplied to variable-length "
+parser = argparse.ArgumentParser(description="Generate a noise range-file which cotains "
+                                 "N lines corresponding to the number of kinds for each original wav. "
+                                 "The file created by this python code will be supplied to variable-length "
                                  "and additive noise program.",
                                  epilog="Called by steps/nnet3/fvector/add_noise.sh")
 parser.add_argument("--num-kind-range", type=int, default=4,
@@ -36,13 +40,13 @@
                     help="Seed for random number generator")
 
 # now the positional arguments
-parser.add_argument("wav_utt2dur",
-                    help="utt2dur file of the original wav to be used as input (format is: "
+parser.add_argument("wav2dur",
+                    help="wav2dur file of the original wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("noise_utt2dur",
-                    help="utt2dur file of the noise wav to be used as input (format is: "
+parser.add_argument("noise2dur",
+                    help="noise2dur file of the noise wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("rangs_dir",
+parser.add_argument("range_dir",
                     help="Name of ranges directory, exp/fxvector/ranges")
 
 print(' '.join(sys.argv))
@@ -57,45 +61,45 @@
 random.seed(args.seed)
 
 # deal with the original wav utt2dur
-# the information was stored in wav_utt_ids[], wav_lengths[] and wav_num_utts
-f = open(args.wav_utt2dur, "r")
+# the information was stored in wav_ids[], wav_lengths[] and wav_num_utts
+f = open(args.wav2dur, "r")
 if f is None:
-    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
-wav_utt_ids = []
+    sys.exit("Error opening wav2dur file " + str(args.wav2dur))
+wav_ids = []
 wav_lengths = []
 for line in f:
     a = line.split()
     if len(a) != 2:
-        sys.exit("bad line in wav_utt2dur file " + line)
+        sys.exit("Bad line in wav2dur file " + line)
 	if float(a[1]) < args.min_additive_noise_len:
 	    sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
-    wav_utt_ids.append(a[0])
+    wav_ids.append(a[0])
     wav_lengths.append(float(a[1]))
 f.close()
 
-wav_num_utts = len(wav_utt_ids)
+wav_num_utts = len(wav_ids)
 
 # deal with the noise wav utt2dur
 # remove the noise whose length < --min-additive-noise-len
 num_error = 0
 num_done = 0
-f = open(args.noise_utt2dur, "r")
+f = open(args.noise2dur, "r")
 if f is None:
-    sys.exit("Error opening wav_utt2dur file " + str(args.wav_utt2dur))
-noise_utt_ids = []
+    sys.exit("Error opening wav2dur file " + str(args.noise2dur))
+noise_ids = []
 noise_lengths = []
 for line in f:
     a = line.split()
     if len(a) != 2:
-        sys.exit("bad line in noise_utt2dur file " + line);
+        sys.exit("bad line in noise2dur file " + line);
     if float(a[1]) < args.min_additive_noise_len:
         num_error += 1
         continue
-    noise_utt_ids.append(a[0])
+    noise_ids.append(a[0])
     noise_lengths.append(float(a[1]))
     num_done += 1
 f.close()
-noise_num_utts = len(noise_utt_ids)
+noise_num_utts = len(noise_ids)
 noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
              "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
              str(num_done) + " noise file."
@@ -104,6 +108,9 @@
 num_error = 0
 num_done = 0
 # generate the range file for each original wav file
+num_variable_error = 0
+num_variable_done = 0
+
 for i in range(0, wav_num_utts):
 
     # check the noise list has enough sample or not
@@ -112,55 +119,58 @@
     
     if max_num_additive_noise > noise_num_utts:
         print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
-        num_error += 1
+        num_variable_error += 1
         continue
 
+    # create a file to record the ranges
+    f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
+    if f is None:
+        sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
+       
     # We generate $num_kind_range ranges
     for j in range(0, args.num_kind_range):
+        # print the perturbed wav id in the beginning of line
+        print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
 
-        # create a file to record the ranges
-        f = open(args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".range." + str(j), "w")
-        if f is None:
-            sys.exit("Error open file " + args.rangs_dir + "/" + str(wav_utt_ids[i]) + ".ranges." + str(j))
         # generate range file
-        # format: wav_t_start, wav_t_end, noise_name, noise_t_start, noise_t_end, snr
+        # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
         the_rest = current_wav_len
         wav_t_start = 0.0
         wav_t_end = 0.0
         while (the_rest > float(args.min_additive_noise_len)):
-	    # firstly, we randomly choose a kind of noise and snr
-	    noise_index = random.randrange(0, noise_num_utts)
-            current_noise_name = noise_utt_ids[noise_index]
+            # firstly, we randomly choose a kind of noise and snr
+            noise_index = random.randrange(0, noise_num_utts)
+            current_noise_name = noise_ids[noise_index]
             current_noise_len = noise_lengths[noise_index]
 	    current_snr = random.randrange(args.max_snr, args.min_snr)
 			
             # Secondly, we randomly select a fragement of the noise file.
             noise_start_bound = float('{:.2f}'.format(current_noise_len - float(args.min_additive_noise_len)))
             noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
-	    noise_end_upperbound = float('{:.2f}'.format(noise_t_start + float(args.min_additive_noise_len)))
-	    noise_end_lowerbound = float('{:.2f}'.format(min((noise_t_start + the_rest), current_noise_len)))
+            noise_end_upperbound = float('{:.2f}'.format(noise_t_start + float(args.min_additive_noise_len)))
+            noise_end_lowerbound = float('{:.2f}'.format(min((noise_t_start + the_rest), current_noise_len)))
             noise_t_end = float('{:.2f}'.format(random.uniform(noise_end_upperbound, noise_end_lowerbound)))
-	    current_noise_length = noise_t_end - noise_t_start
+            current_noise_length = noise_t_end - noise_t_start
 			
-	    # Thirdly, we generate the start and end point of wav
+            # Thirdly, we generate the start and end point of wav
             wav_t_start = wav_t_end #the new start is the end of the last.
-	    wav_t_end = wav_t_start + current_noise_length
+            wav_t_end = wav_t_start + current_noise_length
 			
-	    # Forthly, update the_rest
-	    the_rest = the_rest - current_noise_length
+            # Forthly, update the_rest
+            the_rest = the_rest - current_noise_length
 	    
             # Fifthly, print
-	    print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+            print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
                                                    wav_t_end,
                                                    current_noise_name,
                                                    noise_t_start,
                                                    noise_t_end,
                                                    current_snr),
-                  file=f)
-	# deal with the bit of wav
-	# firstly, we randomly choose a kind of noise and snr
+                  end=",",file=f)
+        # deal with the bit of wav
+        # firstly, we randomly choose a kind of noise and snr
 	noise_index = random.randrange(0, noise_num_utts)
-        current_noise_name = noise_utt_ids[noise_index]
+        current_noise_name = noise_ids[noise_index]
         current_noise_len = noise_lengths[noise_index]
 	current_snr = random.randrange(args.max_snr, args.min_snr)
 		
@@ -169,20 +179,20 @@
         noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
 	noise_t_end = noise_t_start + the_rest
 	current_noise_length = noise_t_end - noise_t_start
-		
+	
 	# Thirdly, we generate the start and end point of wav
         wav_t_start = wav_t_end #the new start is the end of the last.
 	wav_t_end = wav_t_start + current_noise_length
-		
+	
 	# Forthly, print
-	print("{0} {1} {2} {3} {4} {5}".format(wav_t_start,
+        print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
                                                wav_t_end,
                                                current_noise_name,
                                                noise_t_start,
                                                noise_t_end,
                                                current_snr),
-              file=f)
-        f.close()  			
-	num_done += 1
-		
-print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_error, num_done) )
+              file=f)		
+	num_variable_done += 1
+    f.close()
+print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_variable_error, num_variable_done) )
+
diff --git a/src/fvectorbin/nnet3-fvector-perturb-signal.cc b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
index 02e13656b74..09962e96a7b 100644
--- a/src/fvectorbin/nnet3-fvector-perturb-signal.cc
+++ b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
@@ -36,6 +36,78 @@ struct NoiseController{
     noise_t_start_(noise_t_start), noise_t_end_(noise_t_end), snr_(snr) { }
 };
 
+void GenerateController(std::vector<std::string> &segments, 
+                        std::vector<NoiseController> *controller) {
+  BaseFloat wav_t_start;
+  BaseFloat wav_t_end;
+  std::string noise_uttid;
+  BaseFloat noise_t_start;
+  BaseFloat noise_t_end;
+  BaseFloat snr;
+  for(int i=0; i < segments.size(); ++i) {
+    std::vector<std::string> split_string;
+    SplitStringToVector(segments[i], ":", true, &split_string);
+    KALDI_ASSERT(split_string.size() == 6);
+    ConvertStringToReal(split_string[0], &wav_t_start);
+    ConvertStringToReal(split_string[1], &wav_t_end);
+    noise_uttid = split_string[2];
+    ConvertStringToReal(split_string[3], &noise_t_start);
+    ConvertStringToReal(split_string[4], &noise_t_end);
+    ConvertStringToReal(split_string[5], &snr);
+  
+    controller->push_back(NoiseController(wav_t_start, wav_t_end, noise_uttid,
+                                            noise_t_start, noise_t_end, snr));
+  }
+}
+
+void ApplyNoise(std::string &noise_scp, const std::vector<NoiseController> &controller,
+                const VectorBase<BaseFloat> &input_wav, VectorBase<BaseFloat> *perturbed_wav) {
+  // about noise list
+  RandomAccessTableReader<WaveHolder> noise_reader(noise_scp);
+  int samp_freq_input = input_wav.Dim();
+
+  // add noise
+
+  for (int i=0; i < controller.size(); ++i) {
+    const WaveData &noise_wav = noise_reader.Value(controller[i].noise_uttid_);
+    BaseFloat samp_freq_noise = noise_wav.SampFreq();
+    KALDI_ASSERT(samp_freq_input == samp_freq_noise);
+      
+    const Matrix<BaseFloat> &noise_matrix = noise_wav.Data();
+    int32 num_samp_noise = noise_matrix.NumCols();
+    Vector<BaseFloat> noise(num_samp_noise);
+    noise.CopyRowFromMat(noise_matrix, 0);
+
+    int32 input_start_point = samp_freq_input * controller[i].wav_t_start_;
+    int32 input_end_point = samp_freq_input * controller[i].wav_t_end_ - 1;
+    int32 noise_start_point = samp_freq_noise * controller[i].noise_t_start_;
+    int32 noise_end_point = samp_freq_noise * controller[i].noise_t_end_ - 1;
+    BaseFloat snr = controller[i].snr_;
+
+    SubVector<BaseFloat> input_part(input_wav, input_start_point,
+                                    input_end_point - input_start_point + 1);
+    SubVector<BaseFloat> noise_part(noise, noise_start_point,
+                                    noise_end_point - noise_start_point + 1);
+    Vector<BaseFloat> selected_noise(input_part.Dim());
+    if (noise_part.Dim() < input_part.Dim()) {
+      int32 the_rest = selected_noise.Dim();
+      while (the_rest > noise_part.Dim()) {
+        selected_noise.Range(selected_noise.Dim()-the_rest,
+                             noise_part.Dim()).CopyFromVec(noise_part);
+        the_rest = the_rest - noise_part.Dim();
+      }
+      selected_noise.Range(selected_noise.Dim()-the_rest, the_rest).CopyFromVec(
+          noise_part.Range(0, the_rest));
+    } else {
+      selected_noise.CopyFromVec(noise_part);
+    }
+      
+    BaseFloat input_energy = VecVec(input_part, input_part);
+    BaseFloat noise_energy = VecVec(selected_noise, selected_noise);
+    BaseFloat scale_factor = sqrt(input_energy/ noise_energy/ (pow(10, snr/20)) );
+    perturbed_wav->Range(input_start_point, input_part.Dim()).AddVec(scale_factor, selected_noise);
+  }
+}
 
 }
 
@@ -44,27 +116,27 @@ int main(int argc, char *argv[]) {
     using namespace kaldi;
 
     const char *usage =
-        "Perturb the wave files supplied via the specified noise-range file\n"
+        "Perturb the wave files supplied via the specified noise-range\n"
         "Usage:  nnet3-fvector-perturb-signal [options...] <wav-in-rxfilename> "
         "<wav-out-wxfilename>\n"
         "e.g.\n"
-        "nnet3-fvector-perturb-signal --noise-range-file=uttid.range.n --add-noise-list="
-        "scp:noise.scp --input-channel=0 input.wav output.wav\n";
+        "nnet3-fvector-perturb-signal --noise=scp:noise.scp --noise-range="
+        "\"head -n 5 a.noiserange | tail -n 1\" --input-channel=0 input.wav "
+        "perturbed_input.wav\n";
 
     ParseOptions po(usage);
     
-    std::string noise_range_file;
-    std::string noise_list_rspecifier;
+    std::string noise;
+    std::string noise_range;
     int32 input_channel = 0;
 
-    po.Register("noise-range-file",&noise_range_file,
+    po.Register("noise",&noise,
+                "There is a list of optional noise. It need to match the --noise-range.");
+    po.Register("noise-range",&noise_range,
                 "Provide a range file. We use the content in this file to control "
-                "the process of adding noise. The format of each line in this file "
-                ":<wav_t_start> <wav_t_end> <noise_utt_id> <noise_t_start> "
-                "<noise_t_end> <snr>");
-    po.Register("add-noise-list",&noise_list_rspecifier,
-                "There is a list of optional noise. It need to match the "
-                "--noise-range-file.");
+                "the process of adding noise. For each line, the format is <utt_id-perturb-i> "
+                "<wav_t_start_1>:<wav_t_end_1>:<noise_utt_id_1>:<noise_t_start_1>:<noise_t_end_1>:<snr_1>,...,"
+                "<wav_t_start_N>:<wav_t_end_N>:<noise_utt_id_N>:<noise_t_start_N>:<noise_t_end_N>:<snr_N>");
     po.Register("input-channel",&input_channel,
                 "Specifies the channel to be used in input file");
     
@@ -77,23 +149,15 @@ int main(int argc, char *argv[]) {
     std::string input_wave_file = po.GetArg(1);
     std::string output_wave_file = po.GetArg(2);
 
-    // Genterate the Noise Controller list
+    // Generate the Noise Controller list
     std::vector<NoiseController> controller;
-    if (noise_range_file != "") {
-      std::ifstream fi(noise_range_file.c_str());
-      if (!fi) {
-        KALDI_ERR << "failed to open file " << noise_range_file;
-      }
-      BaseFloat wav_t_start;
-      BaseFloat wav_t_end;
-      std::string noise_uttid;
-      BaseFloat noise_t_start;
-      BaseFloat noise_t_end;
-      BaseFloat snr;
-      while (fi >> wav_t_start >> wav_t_end >> noise_uttid >> noise_t_start >> noise_t_end >> snr) {
-        controller.push_back(NoiseController(wav_t_start, wav_t_end, noise_uttid,
-                                            noise_t_start, noise_t_end, snr));
-      }
+    if (noise_range != "") {
+      int index = noise_range.find_first_of(" ");
+      std::string perturbed_utt_id = noise_range.substr(0, index);
+      std::string noise_range_content = noise_range.substr(index+1);
+      std::vector<std::string> segments;
+      SplitStringToVector(noise_range_content, ",", true, &segments);
+      GenerateController(segments, &controller);
     }
 
     WaveData input_wave;
@@ -116,52 +180,9 @@ int main(int argc, char *argv[]) {
     Vector<BaseFloat> input(num_samp_input);
     input.CopyRowFromMat(input_matrix, input_channel);
 
-    // new output vector
+    // new output vector and add noise
     Vector<BaseFloat> output(input);
-
-    // about noise list
-    RandomAccessTableReader<WaveHolder> noise_reader(noise_list_rspecifier);
-
-    // add noise
-    for (int i=0; i < controller.size(); ++i) {
-      const WaveData &noise_wav = noise_reader.Value(controller[i].noise_uttid_);
-      BaseFloat samp_freq_noise = noise_wav.SampFreq();
-      KALDI_ASSERT(samp_freq_input == samp_freq_noise);
-      
-      const Matrix<BaseFloat> &noise_matrix = noise_wav.Data();
-      int32 num_samp_noise = noise_matrix.NumCols();
-      Vector<BaseFloat> noise(num_samp_noise);
-      noise.CopyRowFromMat(noise_matrix, 0);
-
-      int32 input_start_point = samp_freq_input * controller[i].wav_t_start_;
-      int32 input_end_point = samp_freq_input * controller[i].wav_t_end_ - 1;
-      int32 noise_start_point = samp_freq_noise * controller[i].noise_t_start_;
-      int32 noise_end_point = samp_freq_noise * controller[i].noise_t_end_ - 1;
-      BaseFloat snr = controller[i].snr_;
-
-      SubVector<BaseFloat> input_part(input, input_start_point,
-                                      input_end_point - input_start_point + 1);
-      SubVector<BaseFloat> noise_part(noise, noise_start_point,
-                                      noise_end_point - noise_start_point + 1);
-      Vector<BaseFloat> selected_noise(input_part.Dim());
-      if (noise_part.Dim() < input_part.Dim()) {
-        int32 the_rest = selected_noise.Dim();
-        while (the_rest > noise_part.Dim()) {
-          selected_noise.Range(selected_noise.Dim()-the_rest,
-                                    noise_part.Dim()).CopyFromVec(noise_part);
-          the_rest = the_rest - noise_part.Dim();
-        }
-        selected_noise.Range(selected_noise.Dim()-the_rest, the_rest).CopyFromVec(
-            noise_part.Range(0, the_rest));
-      } else {
-        selected_noise.CopyFromVec(noise_part);
-      }
-      
-      BaseFloat input_energy = VecVec(input_part, input_part);
-      BaseFloat noise_energy = VecVec(selected_noise, selected_noise);
-      BaseFloat scale_factor = sqrt(input_energy/ noise_energy/ (pow(10, snr/20)) );
-      output.Range(input_start_point, input_part.Dim()).AddVec(scale_factor, selected_noise);
-    }
+    ApplyNoise(noise, controller, input, &output);
 
     Matrix<BaseFloat> out_matrix(1, num_samp_input);
     out_matrix.CopyRowsFromVec(output);

From 8574d3e0625c07b389e67c890781432fc1a9c29f Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Fri, 30 Dec 2016 17:48:41 -0500
Subject: [PATCH 13/23] modify generate_noise_range.py, add_noise.sh and
 nnet3-fvector-perturb-signal.cc

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   |  25 +--
 .../nnet3/fvector/generate_noise_range.py     | 166 +++++++++---------
 .../nnet3-fvector-perturb-signal.cc           |  71 ++++----
 3 files changed, 139 insertions(+), 123 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
index 4a7169a3b42..03dd451d064 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -2,18 +2,23 @@
 # Copyright 2016  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
+# The script is used to generate the egs which will be used in fvector framework.
+# So far, the script achieves the duration files of train dataset and noise
+# dataset seperately. Then, with the duration files, it will generate the range
+# file which is used to control the process about adding additive noise. 
+
 # Begin Configuration section.
 stage=0
 cmd=run.pl
 nj=4
 # Begain Configuration.
-min_additive_noise_len=2.0       # the minimum duration of each noise file
-num_kind_range=4                 # the number of kinds of noise ranges
-min_snr=-5                       # the minimum snr value
-max_snr=-15                      # the maximum snr value
-seed=-1                          # set the random seed
-variable_len_additive_noise=true #If true, generate the variable-length range files
-                                 #If false, generate the fixed-length range files
+min_additive_noise_len=2.0       # the minimum duration of each noise file in seconds.
+num_ranges_per_wav=4             # the number of noise ranges for each wav.
+min_snr=-5                       # the minimum snr value in dB.
+max_snr=-15                      # the maximum snr value in dB.
+seed=-1                          # set the random seed.
+variable_len_additive_noise=true #If true, generate the variable-length range files.
+                                 #If false, generate the fixed-length range files.
 # End Configuration options.
 
 echo "$0 $@" # Print the command line for logging
@@ -26,7 +31,7 @@ if [ $# != 3 ]; then
   echo "e.g.:  steps/nnet3/fvector/add_noise.sh data/train data/noise ranges"
   echo "main options (for others, see top of script file)"
   echo "  --min-additive-noise-len <second>                # limit the minimum length of noise" 
-  echo "  --num-kind-range <n>                             # number of noise range kinds"
+  echo "  --num-ranges-per-wav <n>                         # number of noise range kinds"
   echo "  --variable-len-additive-noise (true|false)       # decide fixed/variable version"
   echo "  --nj <nj>                                        # number of parallel jobs"
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs"
@@ -52,13 +57,13 @@ if [ $stage -le 0 ]; then
   echo "$0: generate $num_kind_rage kinds of noise range for each original wav"
   $cmd $dir/log/generate_noise_range.log \
     steps/nnet3/fvector/generate_noise_range.py \
-      --num-kind-range=$num_kind_range \
+      --num-ranges-per-wav=$num_ranges_per_wav \
       --min-additive-noise-len=$min_additive_noise_len \
       --min-snr=$min_snr \
       --max-snr=$max_snr \
       --variable-len-additive-noise $variable_len_additive_noise \
       --seed=$seed \
-      $data/utt2dur $noise/utt2dur $dir
+      $data/utt2dur $noise/utt2dur $dir/ranges
 fi
 
 exit 0
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
index fa42e030e85..f55af9e33e9 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python
 
 # The function use to generate range-file for fvector
-# Each line of the range-file corrsponds to a kind of perturbed wav. In each line,
+# Each line of the range-file corrsponds to a kind of perturbed wav. In each
+# line, there is a <perturb-wav-id> in the beginning of the line and then
 # we use comma to seperate different addnoise range. The format of each addnoise
 # range is <wav_t_start>:<wav_t_end>:<noise_uttid>:<noise_t_start>:<noise_t_end>:<snr>
 # The line which starts with the asterisk(*) is the differences between two versions.
 
 # For the fixed-length version:
+# In the beginning of the line, there is a <perturb-wav-id>
 # *For <wav_t_start> <wav_t_end>
 # *Except the last fragement, the length will be a fixed value T.
 # For <noise_uttid>
@@ -19,6 +21,7 @@
 # The snr will be randomly selected form the range (max-snr, min-snr).
 
 # For the variable-length version:
+# In the beginning of the line, there is a <perturb-wav-id>
 # *For <wav_t_start> <wav_t_end>
 # *Except the last fragement, the length will be random.
 # For <noise_uttid>
@@ -39,10 +42,10 @@
                                  "The file which created by this python code will be supplied to "
                                  "add additive noise program.",
                                  epilog="Called by steps/nnet3/fvector/add_noise.sh")
-parser.add_argument("--num-kind-range", type=int, default=4,
+parser.add_argument("--num-ranges-per-wav", type=int, default=4,
                     help="the number of expected addnoise kinds")
 parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
-                    help="the minimum duration/length of each noise file")
+                    help="the minimum duration/length of each noise file in seconds")
 parser.add_argument("--min-snr", type=int, default=-5,
                     help="the minimum Signal-to-Noise Rate, the default=0")
 parser.add_argument("--max-snr", type=int, default=-15,
@@ -61,8 +64,8 @@
 parser.add_argument("noise2dur",
                     help="noise2dur file of the noise wav to be used as input (format is: "
                     "<utterance-id> <duration>")
-parser.add_argument("range_dir",
-                    help="Name of ranges directory, exp/fxvector/ranges")
+parser.add_argument("range_file",
+                    help="Name of range file, e.g.: exp/fxvector/ranges")
 
 print(' '.join(sys.argv))
 
@@ -75,73 +78,58 @@
 
 random.seed(args.seed)
 
-# deal with the original wav utt2dur
-# the information was stored in wav_ids[], wav_lengths[] and wav_num_utts
-f = open(args.wav2dur, "r")
-if f is None:
-    sys.exit("Error opening wav2dur file " + str(args.wav2dur))
-wav_ids = []
-wav_lengths = []
-for line in f:
-    a = line.split()
-    if len(a) != 2:
-        sys.exit("Bad line in wav2dur file " + line)
-    if float(a[1]) < args.min_additive_noise_len:
-	sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
-    wav_ids.append(a[0])
-    wav_lengths.append(float(a[1]))
-f.close()
-
-wav_num_utts = len(wav_ids)
-
-# deal with the noise wav utt2dur
-# remove the noise whose length < --min-additive-noise-len
-num_error = 0
-num_done = 0
-f = open(args.noise2dur, "r")
-if f is None:
-    sys.exit("Error opening noise2dur file " + str(args.noise2dur))
-noise_ids = []
-noise_lengths = []
-for line in f:
-    a = line.split()
-    if len(a) != 2:
-        sys.exit("Bad line in noise2dur file " + line);
-    if float(a[1]) < args.min_additive_noise_len:
-        num_error += 1
-        continue
-    noise_ids.append(a[0])
-    noise_lengths.append(float(a[1]))
-    num_done += 1
-f.close()
-noise_num_utts = len(noise_ids)
-noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
-             "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
-             str(num_done) + " noise file."
-sys.stdout.write( noise_str + '\n')
+# This function extract the information from the file--wav2dur. Its outputs will
+# be ids[] and lengths[]
+def WavToDuration(duration_file, ids, lengths, strict):
+    f = open(duration_file, "r")
+    if f is None:
+        sys.exit("Error opening wav2dur file " + str(duration_file))
+    num_error = 0
+    num_done = 0
+    for line in f:
+        a = line.split()
+        if len(a) != 2:
+            sys.exit("Bad line \"" + line.strip() +"\" in file: " + str(duration_file))
+        if float(a[1]) < args.min_additive_noise_len:
+            if strict:
+	        sys.exit("ERROR: The wav length \"" + line.strip()+ "\" is shorter than --min-additive-noise-len")
+            else:
+                num_error += 1
+                continue
+        ids.append(a[0])
+        lengths.append(float(a[1]))
+        num_done += 1
+    f.close()
+    if num_error is not 0:
+        warning_str ="Warning: There are " + str(num_error) + " utterances whose length smaller than " + \
+             "--min-additive-noise-len, we remove it from the list. Now, there are " + \
+             str(num_done) + " utterances in the list."
+        sys.stdout.write( warning_str + '\n')
+    return
 
 # This function generates the fixed-length range files
 def GenerateFixedLengthRangeFile():
     num_fixed_error = 0
     num_fixed_done = 0
+    num_wav = len(wav_ids)
+    num_noise = len(noise_ids)
+    # create a file to record the ranges
+    f = open(args.range_file, "w")
+    if f is None:
+        sys.exit("Error open file " + args.range_file)
 
-    for i in range(0, wav_num_utts):
+    for i in range(0, num_wav):
         # decide the number of noises which will be add to 
         current_wav_len = wav_lengths[i]
         max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
     
-        if max_num_additive_noise > noise_num_utts:
+        if max_num_additive_noise > num_noise:
             print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
             num_fixed_error += 1
             continue
-
-        # create a file to record the ranges
-        f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
-        if f is None:
-            sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
-
-        # We generate $num_kind_range ranges
-        for j in range(0, args.num_kind_range):
+        
+        # We generate $num_ranges_per_wav ranges
+        for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
             print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
             
@@ -157,7 +145,7 @@ def GenerateFixedLengthRangeFile():
                 wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
                 wav_t_end = wav_t_start + additive_noise_len
 			
-                noise_index = random.randrange(0, noise_num_utts)
+                noise_index = random.randrange(0, num_noise)
                 current_noise_name = noise_ids[noise_index]
                 current_noise_len = noise_lengths[noise_index]
                 if current_noise_len <= additive_noise_len:
@@ -182,7 +170,7 @@ def GenerateFixedLengthRangeFile():
 	    wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
             wav_t_end = current_wav_len
 
-	    noise_index = random.randrange(0, noise_num_utts)
+	    noise_index = random.randrange(0, num_noise)
             current_noise_name = noise_ids[noise_index]
             current_noise_len = noise_lengths[noise_index]
 
@@ -204,32 +192,32 @@ def GenerateFixedLengthRangeFile():
                                                    current_snr),
                   file=f)
 	    num_fixed_done += 1
-        f.close()
-    print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_fixed_error, num_fixed_done) )
+    f.close()
+    print('''Finished generating fixed_length range-file for all wav. Compare with our expect, it lacks %d ranges. Now we totally have %d noise ranges in the range-file.''' %(num_fixed_error, num_fixed_done) )
 
 # This function generates the variable-length range files
 def GenerateVariableLengthRangeFile():
     num_variable_error = 0
     num_variable_done = 0
 
-    for i in range(0, wav_num_utts):
+    # create a file to record the ranges
+    f = open(args.range_file, "w")
+    if f is None:
+        sys.exit("Error open file " + args.range_file)
+
+    for i in range(0, num_wav):
 
         # check the noise list has enough sample or not
         current_wav_len = wav_lengths[i]
         max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
     
-        if max_num_additive_noise > noise_num_utts:
+        if max_num_additive_noise > num_noise:
             print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
             num_variable_error += 1
             continue
-
-        # create a file to record the ranges
-        f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
-        if f is None:
-            sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
-        
-        # We generate $num_kind_range ranges
-        for j in range(0, args.num_kind_range):
+       
+        # We generate $num_ranges_per_wav ranges
+        for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
             print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
 
@@ -240,7 +228,7 @@ def GenerateVariableLengthRangeFile():
             wav_t_end = 0.0
             while (the_rest > float(args.min_additive_noise_len)):
 	        # firstly, we randomly choose a kind of noise and snr
-	        noise_index = random.randrange(0, noise_num_utts)
+	        noise_index = random.randrange(0, num_noise)
                 current_noise_name = noise_ids[noise_index]
                 current_noise_len = noise_lengths[noise_index]
 	        current_snr = random.randrange(args.max_snr, args.min_snr)
@@ -270,7 +258,7 @@ def GenerateVariableLengthRangeFile():
                       end=",",file=f)
 	    # deal with the bit of wav
 	    # firstly, we randomly choose a kind of noise and snr
-	    noise_index = random.randrange(0, noise_num_utts)
+	    noise_index = random.randrange(0, num_noise)
             current_noise_name = noise_ids[noise_index]
             current_noise_len = noise_lengths[noise_index]
 	    current_snr = random.randrange(args.max_snr, args.min_snr)
@@ -294,10 +282,26 @@ def GenerateVariableLengthRangeFile():
                                                    current_snr),
                   file=f)		
 	    num_variable_done += 1
-	f.close()
-    print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_variable_error, num_variable_done) )
+    f.close()
+    print('''Finished generating variable_length range-file for all wav. Compare with our expect, it lacks %d ranges. Now we totally have %d noise ranges in the range-file.''' %(num_variable_error, num_variable_done) )
+
+if __name__ == "__main__":
+    # deal with the original wav utt2dur
+    # the information was stored in wav_ids[], wav_lengths[] and num_wav
+    wav_ids = []
+    wav_lengths = []
+    WavToDuration(args.wav2dur, wav_ids, wav_lengths, True)
+    num_wav = len(wav_ids)
+
+    # deal with the noise wav utt2dur
+    # remove the noise whose length < --min-additive-noise-len
+    noise_ids = []
+    noise_lengths = []
+    WavToDuration(args.noise2dur, noise_ids, noise_lengths, False)
+    num_noise = len(noise_ids)
 
-if args.variable_len_additive_noise == "true":
-    GenerateVariableLengthRangeFile()
-else:
-    GenerateFixedLengthRangeFile()
+    # generate the range file
+    if args.variable_len_additive_noise == "true":
+        GenerateVariableLengthRangeFile()
+    else:
+        GenerateFixedLengthRangeFile()
diff --git a/src/fvectorbin/nnet3-fvector-perturb-signal.cc b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
index 09962e96a7b..52992c173c0 100644
--- a/src/fvectorbin/nnet3-fvector-perturb-signal.cc
+++ b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
@@ -22,22 +22,22 @@
 
 namespace kaldi {
 
-struct NoiseController{
-  BaseFloat wav_t_start_;
-  BaseFloat wav_t_end_;
-  std::string noise_uttid_;
-  BaseFloat noise_t_start_;
-  BaseFloat noise_t_end_;
-  BaseFloat snr_;
-
-  NoiseController(BaseFloat wav_t_start, BaseFloat wav_t_end, std::string noise_uttid,
-                  BaseFloat noise_t_start, BaseFloat noise_t_end, BaseFloat snr):
-    wav_t_start_(wav_t_start), wav_t_end_(wav_t_end), noise_uttid_(noise_uttid),
-    noise_t_start_(noise_t_start), noise_t_end_(noise_t_end), snr_(snr) { }
+struct AdditiveNoiseRange{
+  BaseFloat wav_t_start;
+  BaseFloat wav_t_end;
+  std::string noise_uttid;
+  BaseFloat noise_t_start;
+  BaseFloat noise_t_end;
+  BaseFloat snr;
+
+  AdditiveNoiseRange(BaseFloat wav_t_start, BaseFloat wav_t_end, std::string noise_uttid,
+                     BaseFloat noise_t_start, BaseFloat noise_t_end, BaseFloat snr):
+    wav_t_start(wav_t_start), wav_t_end(wav_t_end), noise_uttid(noise_uttid),
+    noise_t_start(noise_t_start), noise_t_end(noise_t_end), snr(snr) { }
 };
 
 void GenerateController(std::vector<std::string> &segments, 
-                        std::vector<NoiseController> *controller) {
+                        std::vector<AdditiveNoiseRange> *controller) {
   BaseFloat wav_t_start;
   BaseFloat wav_t_end;
   std::string noise_uttid;
@@ -55,12 +55,12 @@ void GenerateController(std::vector<std::string> &segments,
     ConvertStringToReal(split_string[4], &noise_t_end);
     ConvertStringToReal(split_string[5], &snr);
   
-    controller->push_back(NoiseController(wav_t_start, wav_t_end, noise_uttid,
-                                            noise_t_start, noise_t_end, snr));
+    controller->push_back(AdditiveNoiseRange(wav_t_start, wav_t_end, noise_uttid,
+                                             noise_t_start, noise_t_end, snr));
   }
 }
 
-void ApplyNoise(std::string &noise_scp, const std::vector<NoiseController> &controller,
+void ApplyNoise(std::string &noise_scp, const std::vector<AdditiveNoiseRange> &controller,
                 const VectorBase<BaseFloat> &input_wav, VectorBase<BaseFloat> *perturbed_wav) {
   // about noise list
   RandomAccessTableReader<WaveHolder> noise_reader(noise_scp);
@@ -69,7 +69,7 @@ void ApplyNoise(std::string &noise_scp, const std::vector<NoiseController> &cont
   // add noise
 
   for (int i=0; i < controller.size(); ++i) {
-    const WaveData &noise_wav = noise_reader.Value(controller[i].noise_uttid_);
+    const WaveData &noise_wav = noise_reader.Value(controller[i].noise_uttid);
     BaseFloat samp_freq_noise = noise_wav.SampFreq();
     KALDI_ASSERT(samp_freq_input == samp_freq_noise);
       
@@ -78,26 +78,33 @@ void ApplyNoise(std::string &noise_scp, const std::vector<NoiseController> &cont
     Vector<BaseFloat> noise(num_samp_noise);
     noise.CopyRowFromMat(noise_matrix, 0);
 
-    int32 input_start_point = samp_freq_input * controller[i].wav_t_start_;
-    int32 input_end_point = samp_freq_input * controller[i].wav_t_end_ - 1;
-    int32 noise_start_point = samp_freq_noise * controller[i].noise_t_start_;
-    int32 noise_end_point = samp_freq_noise * controller[i].noise_t_end_ - 1;
-    BaseFloat snr = controller[i].snr_;
+    int32 input_start_point = samp_freq_input * controller[i].wav_t_start;
+    int32 input_end_point = samp_freq_input * controller[i].wav_t_end - 1;
+    int32 noise_start_point = samp_freq_noise * controller[i].noise_t_start;
+    int32 noise_end_point = samp_freq_noise * controller[i].noise_t_end - 1;
+    BaseFloat snr = controller[i].snr;
 
+    // The input vector and noise vector contain the whole content of utt seperately.
+    // According to the AdditiveNoiseRange, we stepwise add the additive noise to input.
+    // To save the space, we use Subvector, because it returns the pointer.
     SubVector<BaseFloat> input_part(input_wav, input_start_point,
                                     input_end_point - input_start_point + 1);
     SubVector<BaseFloat> noise_part(noise, noise_start_point,
                                     noise_end_point - noise_start_point + 1);
     Vector<BaseFloat> selected_noise(input_part.Dim());
+
+    // When encounter the situation where noise_part_length is shorter than input_part_length,
+    // We pad recursively until the selected_noise_length equal to input_part_length.
+    // Otherwise, selected_noise = noise_part
     if (noise_part.Dim() < input_part.Dim()) {
-      int32 the_rest = selected_noise.Dim();
-      while (the_rest > noise_part.Dim()) {
-        selected_noise.Range(selected_noise.Dim()-the_rest,
+      int32 the_rest_length = selected_noise.Dim();
+      while (the_rest_length > noise_part.Dim()) {
+        selected_noise.Range(selected_noise.Dim()-the_rest_length,
                              noise_part.Dim()).CopyFromVec(noise_part);
-        the_rest = the_rest - noise_part.Dim();
+        the_rest_length = the_rest_length - noise_part.Dim();
       }
-      selected_noise.Range(selected_noise.Dim()-the_rest, the_rest).CopyFromVec(
-          noise_part.Range(0, the_rest));
+      selected_noise.Range(selected_noise.Dim()-the_rest_length, the_rest_length).CopyFromVec(
+          noise_part.Range(0, the_rest_length));
     } else {
       selected_noise.CopyFromVec(noise_part);
     }
@@ -121,8 +128,8 @@ int main(int argc, char *argv[]) {
         "<wav-out-wxfilename>\n"
         "e.g.\n"
         "nnet3-fvector-perturb-signal --noise=scp:noise.scp --noise-range="
-        "\"head -n 5 a.noiserange | tail -n 1\" --input-channel=0 input.wav "
-        "perturbed_input.wav\n";
+        "wav1-perturbed-1 0.0:1.0:noise1:3.5:4.5:-8,... --input-channel=0 "
+        "input.wav perturbed_input.wav\n";
 
     ParseOptions po(usage);
     
@@ -150,8 +157,8 @@ int main(int argc, char *argv[]) {
     std::string output_wave_file = po.GetArg(2);
 
     // Generate the Noise Controller list
-    std::vector<NoiseController> controller;
-    if (noise_range != "") {
+    std::vector<AdditiveNoiseRange> controller;
+    if (!noise_range.empty()) {
       int index = noise_range.find_first_of(" ");
       std::string perturbed_utt_id = noise_range.substr(0, index);
       std::string noise_range_content = noise_range.substr(index+1);

From c6ec39e327dda63122c85dc98cfee13e4f91b0ef Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Sat, 31 Dec 2016 01:16:56 -0500
Subject: [PATCH 14/23] finished the two steps

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   |  3 +-
 .../nnet3/fvector/generate_noise_range.py     | 38 ++++++-
 .../fvector/generate_perturb_wav_specifier.py | 99 +++++++++++++++++++
 3 files changed, 136 insertions(+), 4 deletions(-)
 create mode 100644 egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
index 03dd451d064..efd648bcbe0 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -6,6 +6,7 @@
 # So far, the script achieves the duration files of train dataset and noise
 # dataset seperately. Then, with the duration files, it will generate the range
 # file which is used to control the process about adding additive noise. 
+# At the same time, it will generate the mapping between wav and perturbedwav.
 
 # Begin Configuration section.
 stage=0
@@ -63,7 +64,7 @@ if [ $stage -le 0 ]; then
       --max-snr=$max_snr \
       --variable-len-additive-noise $variable_len_additive_noise \
       --seed=$seed \
-      $data/utt2dur $noise/utt2dur $dir/ranges
+      $data/utt2dur $noise/utt2dur $dir/ranges $dir/wav2perturbedwav
 fi
 
 exit 0
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
index f55af9e33e9..714f187b896 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
@@ -33,7 +33,8 @@
 # For <snr>, it was used to control the amplitude of noise
 # It will be randomly selected from the range (max-snr, min-snr)
 
-
+# At the same time, the function will generate the mapping of wav and perturbedwav
+# Each line contains a mapping. (e.g.: wav1 wav1-perturbed-1 wav1-perturbed-2 ...)
 from __future__ import print_function
 import re, os, argparse, sys, math, warnings, random
 
@@ -66,6 +67,9 @@
                     "<utterance-id> <duration>")
 parser.add_argument("range_file",
                     help="Name of range file, e.g.: exp/fxvector/ranges")
+parser.add_argument("wav2perturbedwav",
+                    help="This file is used to store the mapping between wav and perturbedwav"
+                    "(e.g.: wav1 wav1-perturbed-1 wav1-perturbed-2 ...")
 
 print(' '.join(sys.argv))
 
@@ -113,10 +117,16 @@ def GenerateFixedLengthRangeFile():
     num_fixed_done = 0
     num_wav = len(wav_ids)
     num_noise = len(noise_ids)
+
     # create a file to record the ranges
     f = open(args.range_file, "w")
     if f is None:
         sys.exit("Error open file " + args.range_file)
+    
+    # create a file to record the wav2perturbedwav
+    g = open(args.wav2perturbedwav, "w")
+    if g is None:
+        sys.exit("Error open file " + args.wav2perturbedwav)
 
     for i in range(0, num_wav):
         # decide the number of noises which will be add to 
@@ -127,12 +137,18 @@ def GenerateFixedLengthRangeFile():
             print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
             num_fixed_error += 1
             continue
-        
+
+        # print the wav_id
+        print("{0}".format(wav_ids[i]), end="", file=g)
+
         # We generate $num_ranges_per_wav ranges
         for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
             print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
-            
+
+            # print the perturbedwav_id
+            print(" {0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end="", file=g)
+
             # select a number from [1 ... max_num_additive_noise]
             num_additive_noise = random.randint(1, max_num_additive_noise)
     
@@ -192,7 +208,10 @@ def GenerateFixedLengthRangeFile():
                                                    current_snr),
                   file=f)
 	    num_fixed_done += 1
+        # print the "\n"
+        print("\n", end="", file=g)
     f.close()
+    g.close()
     print('''Finished generating fixed_length range-file for all wav. Compare with our expect, it lacks %d ranges. Now we totally have %d noise ranges in the range-file.''' %(num_fixed_error, num_fixed_done) )
 
 # This function generates the variable-length range files
@@ -205,6 +224,11 @@ def GenerateVariableLengthRangeFile():
     if f is None:
         sys.exit("Error open file " + args.range_file)
 
+    # create a file to record the wav2perturbedwav
+    g = open(args.wav2perturbedwav, "w")
+    if g is None:
+        sys.exit("Error open file " + args.wav2perturbedwav)
+    
     for i in range(0, num_wav):
 
         # check the noise list has enough sample or not
@@ -216,10 +240,16 @@ def GenerateVariableLengthRangeFile():
             num_variable_error += 1
             continue
        
+        # print the wav_id
+        print("{0}".format(wav_ids[i]), end="", file=g)
+        
         # We generate $num_ranges_per_wav ranges
         for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
             print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
+            
+            # print the perturbedwav_id
+            print(" {0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end="", file=g)
 
             # generate range file
             # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
@@ -282,7 +312,9 @@ def GenerateVariableLengthRangeFile():
                                                    current_snr),
                   file=f)		
 	    num_variable_done += 1
+        print("\n", end="", file=g)
     f.close()
+    g.close()
     print('''Finished generating variable_length range-file for all wav. Compare with our expect, it lacks %d ranges. Now we totally have %d noise ranges in the range-file.''' %(num_variable_error, num_variable_done) )
 
 if __name__ == "__main__":
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
new file mode 100644
index 00000000000..1e4f0775bea
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+# This function is used to generate the perturbed_wav.scp with the inputs as
+# wav.scp, wav2perturbedwav, ranges
+
+# The final format is :
+# wav1 sph2pipe -f wav -p -c 1 $path/wav1.sph |
+# wav1-p1 sph2pipe -f wav -p -c 1 $path/wav1.sph | nnet3-fvector-perturb-signal 
+# --noise-scp=scp:noise.scp noise-range="range-p1-for-wav1" - |
+
+from __future__ import print_function
+import re, os, argparse, sys, math, warnings, random
+
+parser = argparse.ArgumentParser(description="Generate a mapping file which use to map the wav to  "
+                                 "Corresponding pertrubedwav",
+                                 epilog="Called by steps/nnet3/fvector/add_noise.sh")
+parser.add_argument("--noise", type=str,
+                    help="To assign the noise.scp. You must make sure it is same with "
+                    "the noise.scp which is used to generate range_file.")
+# now the positional arguments
+parser.add_argument("wav_scp",
+                    help="The orginial wav.scp which contains all the original wav "
+                    "The format is: <recording-id> <extended-file>.")
+parser.add_argument("range_file",
+                    help="The file contains the range information which is used to "
+                    "control the process of adding noise. The format is : "
+                    "<perturbedwavid> <range-information>.")
+parser.add_argument("wav2perturbedwav",
+                    help="This file contains the mapping between wav and perturbedwav.")
+parser.add_argument("perturbed_wav_scp",
+                    help="The file is used to store the perturbed wav sperifier.")
+
+print(' '.join(sys.argv))
+
+args = parser.parse_args()
+
+# Extract the information form the wav_scprding_ids = []
+wav_recording_ids = []
+wav_extended_files = []
+f = open(args.wav_scp, "r")
+if f is None:
+    sys.exit("Error opening wav.scp file")
+for line in f:
+    # remove the "\n" in the end of each line
+    line.split("\n")
+    a = line.split()
+    wav_recording_ids.append(a[0])
+    del a[0]
+    wav_extended_files.append(' '.join(a))
+f.close()
+
+# Extract the infromation from the range_file
+perturbed_range_ids = []
+perturbed_range_contents = []
+f = open(args.range_file, "r")
+if f is None:
+    sys.exit("Error opening range_file")
+for line in f:
+    # remove the "\n" in the end of each line
+    line.split("\n")
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("Bad line \"" + line + "\" in file: " + str(args.range_file))
+    perturbed_range_ids.append(a[0])
+    perturbed_range_contents.append(a[1])
+f.close()
+
+# generate the mapping file through iterating all terms in the wav2perturbedwav
+f = open(args.wav2perturbedwav, "r")
+if f is None:
+    sys.exit("Error opening wav2perturbedwav")
+# make a store file.
+g = open(args.perturbed_wav_scp, "w")
+if g is None:
+    sys.exit("Error opening perturbed_wav_specifier")
+
+# start the loop
+for line in f:
+    # remove the "\n" in the end of each line
+    line.split("\n")
+    wav_list = line.split()
+    current_wav_id = wav_list[0]
+    current_wav_index = wav_recording_ids.index(current_wav_id)
+
+    # print the original wav
+    print("{0} {1}".format(current_wav_id, wav_extended_files[current_wav_index]), file=g)
+    
+    for i in range(1, len(wav_list)):
+        current_perturbed_wav_id = wav_list[i]
+        current_perturbed_wav_index = perturbed_range_ids.index(current_perturbed_wav_id)
+        print('''{0} {1} nnet3-fvector-perturb-signal --noise-scp=scp:{3} --noise=\"
+              {4}\" - |'''.format(current_perturbed_wav_id,
+                                wav_extend_files[current_wav_index],
+                                args.noise,
+                                perturbed_range_contents[current_perturbed_wav_index]),
+              file=g)
+g.close()
+f.close()
+print("Finished generating the perturb_wav.scp")

From 429c1461bc8c922802cddec1d088cd22c0eb6d03 Mon Sep 17 00:00:00 2001
From: LvHang <hanglv@nwpu-aslp.org>
Date: Sat, 31 Dec 2016 01:18:31 -0500
Subject: [PATCH 15/23] Delete generate_fixed_length_range.py

---
 .../fvector/generate_fixed_length_range.py    | 191 ------------------
 1 file changed, 191 deletions(-)
 delete mode 100644 egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py

diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
deleted file mode 100644
index 84a41541163..00000000000
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_fixed_length_range.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/env python
-
-# The function use to generate range file for fvector
-# This is the fixed-length version
-# Each line of the range-file corrsponds to a kind of perturbed wav. In each line,
-# we use comma to seperate different addnoise range. The format of each addnoise
-# range is <wav_t_start>:<wav_t_end>:<noise_uttid>:<noise_t_start>:<noise_t_end>:<snr>
-
-# For <wav_t_start> <wav_t_end>
-# Except the last fragement, the length will be a fixed value T.
-
-# For <noise_uttid>
-# It is randomly selected from noise list, which is longer than --min-additive-noise-len
-
-# For <noise_t_start> <noise_t_end>
-# If the noise file is longer than fixed value. We randomly select the start point and 
-# the length will be fixed value T.
-# If the noise file is shorter than T. We select the whole noise.
-
-# The <snr> control the rate of signal and noise. In the other word, scale the amplitude of noise.
-# The snr will be randomly selected form the range (max-snr, min-snr).
-
-from __future__ import print_function
-import re, os, argparse, sys, math, warnings, random
-
-parser = argparse.ArgumentParser(description="Generate a noise range files which contains "
-                                 "N lines corresponding to the number of kinds for each original wav. "
-                                 "The file created by this python code will be supplied to fixed-length add "
-                                 "additive noise program.",
-                                 epilog="Called by steps/nnet3/fvector/add_noise.sh")
-parser.add_argument("--num-kind-range", type=int, default=4,
-                    help="the number of expected addnoise kinds")
-parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
-                    help="the minimum duration/length of each noise file")
-parser.add_argument("--min-snr", type=int, default=0,
-                    help="the minimum Signal-to-Noise Rate, the default=0")
-parser.add_argument("--max-snr", type=int, default=-10,
-                    help="the maximum Signal-to-Noise Rate, the default=-10")
-parser.add_argument("--seed", type=int, default=-1,
-                    help="Seed for random number generator")
-
-# now the positional arguments
-parser.add_argument("wav2dur",
-                    help="wav2dur file of the original wav to be used as input (format is: "
-                    "<utterance-id> <duration>")
-parser.add_argument("noise2dur",
-                    help="noise2dur file of the noise wav to be used as input (format is: "
-                    "<utterance-id> <duration>")
-parser.add_argument("range_dir",
-                    help="Name of ranges directory, exp/fxvector/ranges")
-
-print(' '.join(sys.argv))
-
-args = parser.parse_args()
-
-## Check arguments
-if args.min_snr < args.max_snr:
-    sys.exit("For SNR, the less numerical value is, the larger noise is. So --min-snr bigger "
-             "than --max-snr in numerical value.")
-
-random.seed(args.seed)
-
-# deal with the original wav utt2dur
-# the information was stored in wav_ids[], wav_lengths[] and wav_num_utts
-f = open(args.wav2dur, "r")
-if f is None:
-    sys.exit("Error opening wav2dur file " + str(args.wav2dur))
-wav_ids = []
-wav_lengths = []
-for line in f:
-    a = line.split()
-    if len(a) != 2:
-        sys.exit("Bad line in wav2dur file " + line)
-    if float(a[1]) < args.min_additive_noise_len:
-	sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
-    wav_ids.append(a[0])
-    wav_lengths.append(float(a[1]))
-f.close()
-
-wav_num_utts = len(wav_ids)
-
-# deal with the noise wav utt2dur
-# remove the noise whose length < --min-additive-noise-len
-num_error = 0
-num_done = 0
-f = open(args.noise2dur, "r")
-if f is None:
-    sys.exit("Error opening noise2dur file " + str(args.noise2dur))
-noise_ids = []
-noise_lengths = []
-for line in f:
-    a = line.split()
-    if len(a) != 2:
-        sys.exit("Bad line in noise2dur file " + line);
-    if float(a[1]) < args.min_additive_noise_len:
-        num_error += 1
-        continue
-    noise_ids.append(a[0])
-    noise_lengths.append(float(a[1]))
-    num_done += 1
-f.close()
-noise_num_utts = len(noise_ids)
-noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
-             "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
-             str(num_done) + " noise file."
-sys.stdout.write( noise_str + '\n')
-
-num_fixed_error = 0
-num_fixed_done = 0
-
-for i in range(0, wav_num_utts):
-    # decide the number of noises which will be add to 
-    current_wav_len = wav_lengths[i]
-    max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
-   
-    if max_num_additive_noise > noise_num_utts:
-        print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
-        num_fixed_error += 1
-        continue
-
-    # create a file to record the ranges
-    f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
-    if f is None:
-        sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
-
-    # We generate $num_kind_range ranges
-    for j in range(0, args.num_kind_range):
-        # print the perturbed wav id in the beginning of line
-        print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
-            
-        # select a number from [1 ... max_num_additive_noise]
-        num_additive_noise = random.randint(1, max_num_additive_noise)
-    
-        # decide the length of each noise, minus 0.01 to prevent overstep
-        additive_noise_len = float('{:.2f}'.format(current_wav_len / num_additive_noise)) - 0.01
-
-        # generate one line of file
-        # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
-        for k in range(0, num_additive_noise - 1):
-            wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
-            wav_t_end = wav_t_start + additive_noise_len
-			
-            noise_index = random.randrange(0, noise_num_utts)
-            current_noise_name = noise_ids[noise_index]
-            current_noise_len = noise_lengths[noise_index]
-            if current_noise_len <= additive_noise_len:
-                noise_t_start = 0.0
-                noise_t_end = current_noise_len
-	    else :
-	        noise_start_bound = float('{:.2f}'.format(current_noise_len - additive_noise_len))
-                noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
-                noise_t_end = noise_t_start + additive_noise_len
-
-            current_snr = random.randrange(args.max_snr, args.min_snr)
-
-            print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
-                                                   wav_t_end,
-                                                   current_noise_name,
-                                                   noise_t_start,
-                                                   noise_t_end,
-                                                   current_snr),
-                  end=",",file=f)
-	# deal with the last noise, which cover the rest
-        k = num_additive_noise - 1
-	wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
-        wav_t_end = current_wav_len
-
-	noise_index = random.randrange(0, noise_num_utts)
-        current_noise_name = noise_ids[noise_index]
-        current_noise_len = noise_lengths[noise_index]
-
-	if current_noise_len <= (wav_t_end - wav_t_start):
-	    noise_t_start = 0.0
-	    noise_t_end = current_noise_len
-	else :
-	    noise_start_bound = float('{:.2f}'.format(current_noise_len - wav_t_end + wav_t_start))
-            noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
-            noise_t_end = noise_t_start + wav_t_end - wav_t_start		
-		
-	current_snr = random.randrange(args.max_snr, args.min_snr)
-
-        print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
-                                               wav_t_end,
-                                               current_noise_name,
-                                               noise_t_start,
-                                               noise_t_end,
-                                               current_snr),
-              file=f)
-	num_fixed_done += 1
-    f.close()
-print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_fixed_error, num_fixed_done) )

From ffca370f2ef1d1cc9552197877e625b13ae05adb Mon Sep 17 00:00:00 2001
From: LvHang <hanglv@nwpu-aslp.org>
Date: Sat, 31 Dec 2016 01:18:40 -0500
Subject: [PATCH 16/23] Delete generate_variable_length_range.py

---
 .../fvector/generate_variable_length_range.py | 198 ------------------
 1 file changed, 198 deletions(-)
 delete mode 100644 egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py

diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
deleted file mode 100644
index ef0fe9e02a2..00000000000
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_variable_length_range.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env python
-
-# The function use to generate range file for fvector
-# This is the variable-length version
-# Each line of the range-file corrsponds to a kind of perturbed wav. In each line,
-# we use comma to seperate different addnoise range. The format of each addnoise
-# range is <wav_t_start>:<wav_t_end>:<noise_uttid>:<noise_t_start>:<noise_t_end>:<snr>
-
-# For <wav_t_start> <wav_t_end>
-# Except the last fragement, the length will be random.
-
-# For <noise_uttid>
-# It is randomly selected from noise list, which is longer than --min-additive-noise-len
-
-# For <noise_t_start> <noise_t_end>
-# If the noise file is longer than wav length. We randomly select the start point and 
-# the length will be the same as wav length.
-# If the noise file is shorter than T. We select the whole noise.
-
-# For <snr>, it was used to control the amplitude of noise
-# It will be randomly selected from the range (max-snr, min-snr)
-
-from __future__ import print_function
-import re, os, argparse, sys, math, warnings, random
-
-parser = argparse.ArgumentParser(description="Generate a noise range-file which cotains "
-                                 "N lines corresponding to the number of kinds for each original wav. "
-                                 "The file created by this python code will be supplied to variable-length "
-                                 "and additive noise program.",
-                                 epilog="Called by steps/nnet3/fvector/add_noise.sh")
-parser.add_argument("--num-kind-range", type=int, default=4,
-                    help="the number of noise range files")
-parser.add_argument("--min-additive-noise-len", type=float, default=2.0,
-                    help="the minimum duration/length of each noise file")
-parser.add_argument("--min-snr", type=int, default=0,
-                    help="the minimum Signal-to-Noise Rate, the default=0")
-parser.add_argument("--max-snr", type=int, default=-10,
-                    help="the maximum Signal-to-Noise Rate, the default=-10")
-parser.add_argument("--seed", type=int, default=-1,
-                    help="Seed for random number generator")
-
-# now the positional arguments
-parser.add_argument("wav2dur",
-                    help="wav2dur file of the original wav to be used as input (format is: "
-                    "<utterance-id> <duration>")
-parser.add_argument("noise2dur",
-                    help="noise2dur file of the noise wav to be used as input (format is: "
-                    "<utterance-id> <duration>")
-parser.add_argument("range_dir",
-                    help="Name of ranges directory, exp/fxvector/ranges")
-
-print(' '.join(sys.argv))
-
-args = parser.parse_args()
-
-## Check arguments
-if args.min_snr < args.max_snr:
-    sys.exit("For SNR, the less numerical value is, the larger noise is. So --min-snr bigger "
-             "than --max-snr in numerical value.")
-
-random.seed(args.seed)
-
-# deal with the original wav utt2dur
-# the information was stored in wav_ids[], wav_lengths[] and wav_num_utts
-f = open(args.wav2dur, "r")
-if f is None:
-    sys.exit("Error opening wav2dur file " + str(args.wav2dur))
-wav_ids = []
-wav_lengths = []
-for line in f:
-    a = line.split()
-    if len(a) != 2:
-        sys.exit("Bad line in wav2dur file " + line)
-	if float(a[1]) < args.min_additive_noise_len:
-	    sys.exit("ERROR: The wav length is shorter than --min-additive-noise-len")
-    wav_ids.append(a[0])
-    wav_lengths.append(float(a[1]))
-f.close()
-
-wav_num_utts = len(wav_ids)
-
-# deal with the noise wav utt2dur
-# remove the noise whose length < --min-additive-noise-len
-num_error = 0
-num_done = 0
-f = open(args.noise2dur, "r")
-if f is None:
-    sys.exit("Error opening wav2dur file " + str(args.noise2dur))
-noise_ids = []
-noise_lengths = []
-for line in f:
-    a = line.split()
-    if len(a) != 2:
-        sys.exit("bad line in noise2dur file " + line);
-    if float(a[1]) < args.min_additive_noise_len:
-        num_error += 1
-        continue
-    noise_ids.append(a[0])
-    noise_lengths.append(float(a[1]))
-    num_done += 1
-f.close()
-noise_num_utts = len(noise_ids)
-noise_str =  "Warning: There are " + str(num_error) + " noise files length smaller than " + \
-             "--min-additive-noise-len, we remove it from the noise list. Now, there are " + \
-             str(num_done) + " noise file."
-sys.stdout.write( noise_str + '\n')
-
-num_error = 0
-num_done = 0
-# generate the range file for each original wav file
-num_variable_error = 0
-num_variable_done = 0
-
-for i in range(0, wav_num_utts):
-
-    # check the noise list has enough sample or not
-    current_wav_len = wav_lengths[i]
-    max_num_additive_noise = int(current_wav_len / args.min_additive_noise_len)
-    
-    if max_num_additive_noise > noise_num_utts:
-        print( "Warning: The number of noise files or the --min-additive-noise-len is too small" )
-        num_variable_error += 1
-        continue
-
-    # create a file to record the ranges
-    f = open(args.range_dir + "/" + str(wav_ids[i]) + ".noiserange", "w")
-    if f is None:
-        sys.exit("Error open file " + args.range_dir + "/" + str(wav_ids[i]) + ".noiserange")
-       
-    # We generate $num_kind_range ranges
-    for j in range(0, args.num_kind_range):
-        # print the perturbed wav id in the beginning of line
-        print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
-
-        # generate range file
-        # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
-        the_rest = current_wav_len
-        wav_t_start = 0.0
-        wav_t_end = 0.0
-        while (the_rest > float(args.min_additive_noise_len)):
-            # firstly, we randomly choose a kind of noise and snr
-            noise_index = random.randrange(0, noise_num_utts)
-            current_noise_name = noise_ids[noise_index]
-            current_noise_len = noise_lengths[noise_index]
-	    current_snr = random.randrange(args.max_snr, args.min_snr)
-			
-            # Secondly, we randomly select a fragement of the noise file.
-            noise_start_bound = float('{:.2f}'.format(current_noise_len - float(args.min_additive_noise_len)))
-            noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
-            noise_end_upperbound = float('{:.2f}'.format(noise_t_start + float(args.min_additive_noise_len)))
-            noise_end_lowerbound = float('{:.2f}'.format(min((noise_t_start + the_rest), current_noise_len)))
-            noise_t_end = float('{:.2f}'.format(random.uniform(noise_end_upperbound, noise_end_lowerbound)))
-            current_noise_length = noise_t_end - noise_t_start
-			
-            # Thirdly, we generate the start and end point of wav
-            wav_t_start = wav_t_end #the new start is the end of the last.
-            wav_t_end = wav_t_start + current_noise_length
-			
-            # Forthly, update the_rest
-            the_rest = the_rest - current_noise_length
-	    
-            # Fifthly, print
-            print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
-                                                   wav_t_end,
-                                                   current_noise_name,
-                                                   noise_t_start,
-                                                   noise_t_end,
-                                                   current_snr),
-                  end=",",file=f)
-        # deal with the bit of wav
-        # firstly, we randomly choose a kind of noise and snr
-	noise_index = random.randrange(0, noise_num_utts)
-        current_noise_name = noise_ids[noise_index]
-        current_noise_len = noise_lengths[noise_index]
-	current_snr = random.randrange(args.max_snr, args.min_snr)
-		
-	# Secondly, we randomly select a fragement of the noise file.
-        noise_start_bound = float('{:.2f}'.format(current_noise_len - the_rest))
-        noise_t_start = float('{:.2f}'.format(random.uniform(0, noise_start_bound)))
-	noise_t_end = noise_t_start + the_rest
-	current_noise_length = noise_t_end - noise_t_start
-	
-	# Thirdly, we generate the start and end point of wav
-        wav_t_start = wav_t_end #the new start is the end of the last.
-	wav_t_end = wav_t_start + current_noise_length
-	
-	# Forthly, print
-        print("{0}:{1}:{2}:{3}:{4}:{5}".format(wav_t_start,
-                                               wav_t_end,
-                                               current_noise_name,
-                                               noise_t_start,
-                                               noise_t_end,
-                                               current_snr),
-              file=f)		
-	num_variable_done += 1
-    f.close()
-print('''generate_fixed_length_range.py: finished generate the range files for all wav. Compare with our expect, it lacks %d files. Now we totally have %d noise range files.''' %(num_variable_error, num_variable_done) )
-

From 8a9febf1b0d73cbf9b96389f704fd738e4ed2080 Mon Sep 17 00:00:00 2001
From: LvHang <hanglv@nwpu-aslp.org>
Date: Sat, 31 Dec 2016 01:18:46 -0500
Subject: [PATCH 17/23] Delete lh_add_noise.sh

---
 .../s5/steps/nnet3/fvector/lh_add_noise.sh    | 41 -------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh

diff --git a/egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh
deleted file mode 100644
index 6b81af6c6ec..00000000000
--- a/egs/wsj/s5/steps/nnet3/fvector/lh_add_noise.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Begin Configuration section
-stage=0
-min_additive_noise_len=2 # the minimum duration of each noise file
-num_kind_range=4         # the number of kinds of noise ranges
-min_snr=0                # the minimum snr value
-max_snr=0                # the maximum snr value
-seed=-1                  # set the random seed
-
-# End Configuration section
-
-data=$1  # contain wav.scp
-noise=$2 # contain noise.scp 
-dir=$3   # eg: ranges/
-
-
-if [ ! -f $data/utt2dur ]; then
-  # get original clean wav's duration
-  utils/data/get_utt2dur.sh $data 
-fi
-
-if [ ! -f $noise/utt2dur ]; then
-  # get the duration of each noise file
-  utils/data/get_utt2dur.sh $noise
-fi
-
-mkdir -p $dir/log
-if [ $stage -le 0 ]; then
-  echo "$0: generate $num_kind_rage kinds of noise range for each original wav"
-  $cmd $dir/log/generate_noise_range.log \
-    steps/nnet3/fvector/generate_noise_range.py \
-      --num-kind-range=$num_kind_range \
-      --min-additive-noise-len=$min_additive_noise_len \
-      --min-snr=$min_snr \
-      --max-snr=$max_snr \
-      --seed=$seed \
-      $data/utt2dur $noise/utt2dur $dir
-fi
-
-exit 0

From 954fa90313dacb1d10bd791c04dbf463b59194e5 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@b01.clsp.jhu.edu>
Date: Mon, 2 Jan 2017 22:00:04 -0500
Subject: [PATCH 18/23] fix utt-id != recording-id

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   | 27 +++++++++++++++++--
 .../nnet3/fvector/generate_noise_range.py     | 12 ++++-----
 .../fvector/generate_perturb_wav_specifier.py | 11 ++++----
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
index efd648bcbe0..d7836fb5f01 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -40,17 +40,31 @@ fi
 
 data=$1  # contain wav.scp
 noise=$2 # contain noise.scp 
-dir=$3   # eg: ranges/
+dir=$3   # eg: data/perturbed
 
 
 if [ ! -f $data/utt2dur ]; then
+  # remove the segments so that the duration corresponding to recording-id
+  if [ -f $data/segments ]; then
+    mv $data/segments $data/segments_backup
+  fi
   # get original clean wav's duration
-  utils/data/get_utt2dur.sh $data 
+  utils/data/get_utt2dur.sh $data
+  if [ -f $data/segments_backup ]; then
+    mv $data/segments_backup segments
+  fi
 fi
 
 if [ ! -f $noise/utt2dur ]; then
+  # remove the segments so that the duration corresponding to recording-id
+  if [ -f $data/segments ]; then
+    mv $data/segments $data/segments_backup
+  fi
   # get the duration of each noise file
   utils/data/get_utt2dur.sh $noise
+  if [ -f $data/segments_backup ]; then
+    mv $data/segments_backup segments
+  fi
 fi
 
 mkdir -p $dir/log
@@ -65,6 +79,15 @@ if [ $stage -le 0 ]; then
       --variable-len-additive-noise $variable_len_additive_noise \
       --seed=$seed \
       $data/utt2dur $noise/utt2dur $dir/ranges $dir/wav2perturbedwav
+  #if the segments is exist
+
 fi
 
+if [ $stage -le 1 ]; then
+  echo "$0: generate perturbed_wav_specifier"
+  $cmd $dir/log/generate_perturb_wav_specifier.log \
+    steps/nnet3/fvector/generate_perturb_wav_specifier.py \
+      --noise=$noise/wav.scp \
+      $data/wav.scp $dir/ranges $dir/wav2perturbedwav $dir/perturbed_wav.scp
+fi
 exit 0
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
index 714f187b896..ed147b27d40 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
@@ -144,10 +144,10 @@ def GenerateFixedLengthRangeFile():
         # We generate $num_ranges_per_wav ranges
         for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
-            print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
+            print("{1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end=" ", file=f)
 
             # print the perturbedwav_id
-            print(" {0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end="", file=g)
+            print(" {1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end="", file=g)
 
             # select a number from [1 ... max_num_additive_noise]
             num_additive_noise = random.randint(1, max_num_additive_noise)
@@ -184,7 +184,7 @@ def GenerateFixedLengthRangeFile():
 	    # deal with the last noise, which cover the rest
             k = num_additive_noise - 1
 	    wav_t_start = float('{:.2f}'.format(k * additive_noise_len))
-            wav_t_end = current_wav_len
+            wav_t_end = float('{:.2f}'.format(current_wav_len))
 
 	    noise_index = random.randrange(0, num_noise)
             current_noise_name = noise_ids[noise_index]
@@ -246,14 +246,14 @@ def GenerateVariableLengthRangeFile():
         # We generate $num_ranges_per_wav ranges
         for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
-            print("{0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end=" ", file=f)
+            print("{1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end=" ", file=f)
             
             # print the perturbedwav_id
-            print(" {0}-{1}".format(wav_ids[i], "perturbed-"+str(j+1)), end="", file=g)
+            print(" {1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end="", file=g)
 
             # generate range file
             # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
-            the_rest = current_wav_len
+            the_rest = float('{:.2f}'.format(current_wav_len))
             wav_t_start = 0.0
             wav_t_end = 0.0
             while (the_rest > float(args.min_additive_noise_len)):
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
index 1e4f0775bea..58892782f05 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
@@ -88,12 +88,11 @@
     for i in range(1, len(wav_list)):
         current_perturbed_wav_id = wav_list[i]
         current_perturbed_wav_index = perturbed_range_ids.index(current_perturbed_wav_id)
-        print('''{0} {1} nnet3-fvector-perturb-signal --noise-scp=scp:{3} --noise=\"
-              {4}\" - |'''.format(current_perturbed_wav_id,
-                                wav_extend_files[current_wav_index],
-                                args.noise,
-                                perturbed_range_contents[current_perturbed_wav_index]),
-              file=g)
+        print('''{0} {1} nnet3-fvector-perturb-signal --noise-scp=scp:{2} --noise=\"{3}\" - |'''.format(
+            current_perturbed_wav_id,
+            wav_extended_files[current_wav_index],
+            args.noise,
+            perturbed_range_contents[current_perturbed_wav_index]),file=g)
 g.close()
 f.close()
 print("Finished generating the perturb_wav.scp")

From 5ad323d6207c2e46beba9b858f3600a355fd5604 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@b01.clsp.jhu.edu>
Date: Mon, 2 Jan 2017 22:28:47 -0500
Subject: [PATCH 19/23] fix add_noise.sh

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh | 56 ++++++++++++++-------
 1 file changed, 37 insertions(+), 19 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
index d7836fb5f01..b2f6fd7edb1 100644
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -42,31 +42,41 @@ data=$1  # contain wav.scp
 noise=$2 # contain noise.scp 
 dir=$3   # eg: data/perturbed
 
-
-if [ ! -f $data/utt2dur ]; then
-  # remove the segments so that the duration corresponding to recording-id
-  if [ -f $data/segments ]; then
-    mv $data/segments $data/segments_backup
-  fi
-  # get original clean wav's duration
-  utils/data/get_utt2dur.sh $data
-  if [ -f $data/segments_backup ]; then
-    mv $data/segments_backup segments
+# remove the segments so that the duration corresponding to recording-id
+if [ -f $data/segments ]; then
+  mv $data/segments $data/segments_backup
+  if [ -f $data/utt2dur ]; then
+    mv $data/utt2dur $data/utt2dur.backup
+    utils/data/get_utt2dur.sh $data
+  else if
+    utils/data/get_utt2dur.sh $data
   fi
+  mv $data/segments_backup $data/segments
+else if
+  if [ ! -f $data/utt2dur ]; then
+    # get original clean wav's duration
+    utils/data/get_utt2dur.sh $data
+  fi 
 fi
 
-if [ ! -f $noise/utt2dur ]; then
-  # remove the segments so that the duration corresponding to recording-id
-  if [ -f $data/segments ]; then
-    mv $data/segments $data/segments_backup
-  fi
-  # get the duration of each noise file
-  utils/data/get_utt2dur.sh $noise
-  if [ -f $data/segments_backup ]; then
-    mv $data/segments_backup segments
+# remove the segments so that the duration corresponding to recording-id
+if [ -f $noise/segments ]; then
+  mv $noise/segments $noise/segments_backup
+  if [ -f $noise/utt2dur ]; then
+    mv $noise/utt2dur $noise/utt2dur.backup
+    utils/data/get_utt2dur.sh $noise
+  else if
+    utils/data/get_utt2dur.sh $noise
   fi
+  mv $noise/segments_backup $noise/segments
+else if
+  if [ ! -f $noise/utt2dur ]; then
+    # get original clean wav's duration
+    utils/data/get_utt2dur.sh $noise
+  fi 
 fi
 
+
 mkdir -p $dir/log
 if [ $stage -le 0 ]; then
   echo "$0: generate $num_kind_rage kinds of noise range for each original wav"
@@ -90,4 +100,12 @@ if [ $stage -le 1 ]; then
       --noise=$noise/wav.scp \
       $data/wav.scp $dir/ranges $dir/wav2perturbedwav $dir/perturbed_wav.scp
 fi
+
+if [ -f $data/utt2dur.backup ]; then
+  mv $data/utt2dur.backup $data/utt2dur
+fi
+if [ -f $noise/utt2dur.backup ]; then
+  mv $noise/utt2dur.backup $noise/utt2dur
+fi
+
 exit 0

From cef219cf62b3ae9abe7007e86b15c48c9afa0898 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@b01.clsp.jhu.edu>
Date: Tue, 3 Jan 2017 22:47:41 -0500
Subject: [PATCH 20/23] generate a intergrated dir

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   |  46 ++++++--
 .../steps/nnet3/fvector/apply_map_one2mult.pl | 111 ++++++++++++++++++
 .../nnet3/fvector/generate_noise_range.py     |   0
 .../fvector/generate_perturb_wav_specifier.py |   0
 src/fvectorbin/Makefile                       |   2 +-
 5 files changed, 151 insertions(+), 8 deletions(-)
 mode change 100644 => 100755 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
 create mode 100755 egs/wsj/s5/steps/nnet3/fvector/apply_map_one2mult.pl
 mode change 100644 => 100755 egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
 mode change 100644 => 100755 egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
old mode 100644
new mode 100755
index b2f6fd7edb1..9d40d074a3e
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -48,11 +48,11 @@ if [ -f $data/segments ]; then
   if [ -f $data/utt2dur ]; then
     mv $data/utt2dur $data/utt2dur.backup
     utils/data/get_utt2dur.sh $data
-  else if
+  else
     utils/data/get_utt2dur.sh $data
   fi
   mv $data/segments_backup $data/segments
-else if
+else
   if [ ! -f $data/utt2dur ]; then
     # get original clean wav's duration
     utils/data/get_utt2dur.sh $data
@@ -65,11 +65,11 @@ if [ -f $noise/segments ]; then
   if [ -f $noise/utt2dur ]; then
     mv $noise/utt2dur $noise/utt2dur.backup
     utils/data/get_utt2dur.sh $noise
-  else if
+  else
     utils/data/get_utt2dur.sh $noise
   fi
   mv $noise/segments_backup $noise/segments
-else if
+else
   if [ ! -f $noise/utt2dur ]; then
     # get original clean wav's duration
     utils/data/get_utt2dur.sh $noise
@@ -89,8 +89,6 @@ if [ $stage -le 0 ]; then
       --variable-len-additive-noise $variable_len_additive_noise \
       --seed=$seed \
       $data/utt2dur $noise/utt2dur $dir/ranges $dir/wav2perturbedwav
-  #if the segments is exist
-
 fi
 
 if [ $stage -le 1 ]; then
@@ -98,7 +96,41 @@ if [ $stage -le 1 ]; then
   $cmd $dir/log/generate_perturb_wav_specifier.log \
     steps/nnet3/fvector/generate_perturb_wav_specifier.py \
       --noise=$noise/wav.scp \
-      $data/wav.scp $dir/ranges $dir/wav2perturbedwav $dir/perturbed_wav.scp
+      $data/wav.scp $dir/ranges $dir/wav2perturbedwav $dir/wav.scp
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: generate other files in data directory"
+  #reco2file_and_channel
+  cat $dir/wav2perturbedwav | cut -d ' ' -f 1 | paste -d ' ' - $dir/wav2perturbedwav > $dir/perturb_recording_map
+  steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_recording_map <$data/reco2file_and_channel >$dir/reco2file_and_channel
+  if [ -f $data/segments ]; then
+    awk -v num=$num_ranges_per_wav '{
+      printf("%s %s",$1, $1);
+      for(i=1; i<= num; i++){ printf(" %s%s-%s","perturb", i, $1); }
+      printf("\n");
+    }' <$data/segments > $dir/perturb_utt_map
+    cat $dir/perturb_recording_map > $dir/perturb_map
+    cat $dir/perturb_utt_map >> $dir/perturb_map
+    #segments
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments >$dir/segments
+    #text
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/text >$dir/text
+    #utt2spk
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/utt2spk >$dir/utt2spk
+    #spk2utt
+    utt2spk_to_spk2utt.pl <$dir/utt2spk | sort > $dir/spk2utt
+  else #no segments->wav indexed by utterence-id/<recording-id> is equal to <utt-id>
+    cp $dir/perturb_recording_map $dir/perturb_map
+    #segments
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments >$dir/segments
+    #text
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/text >$dir/text
+    #utt2spk
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/utt2spk >$dir/utt2spk
+    #spk2utt
+    utt2spk_to_spk2utt.pl <$dir/utt2spk | sort > $dir/spk2utt
+  fi
 fi
 
 if [ -f $data/utt2dur.backup ]; then
diff --git a/egs/wsj/s5/steps/nnet3/fvector/apply_map_one2mult.pl b/egs/wsj/s5/steps/nnet3/fvector/apply_map_one2mult.pl
new file mode 100755
index 00000000000..fbf92e10331
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/apply_map_one2mult.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
+# Copyright 2016  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This program try to slove the following problem:
+# Assume the map is A A1 A2 A3 A4
+# The input is A B C D
+# The output is A1 B C D \n A2 B C D \n A3 B C D \n A4 B C D \n
+# This is a one2multiple mapping.
+
+# Attentation: Use ":" to join the post-map. 
+
+
+if (@ARGV > 0 && $ARGV[0] eq "-f") {
+  shift @ARGV;
+  $field_spec = shift @ARGV; 
+  if ($field_spec =~ m/^\d+$/) {
+    $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+  }
+  if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+    if ($1 ne "") {
+      $field_begin = $1 - 1;    # Change to zero-based indexing.
+    }
+    if ($2 ne "") {
+      $field_end = $2 - 1;      # Change to zero-based indexing.
+    }
+  }
+  if (!defined $field_begin && !defined $field_end) {
+    die "Bad argument to -f option: $field_spec"; 
+  }
+}
+
+# Mapping is obligatory
+$permissive = 0;
+if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
+  shift @ARGV;
+  # Mapping is optional (missing key is printed to output)
+  $permissive = 1;
+}
+
+if(@ARGV != 1) {
+  print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
+  print STDERR "Usage: apply_map_one2mult.pl [options] map <input >output\n" .
+    "options: [-f <field-range> ]\n" .
+    "Applies the map 'map' to all input text, where each line of the map\n" .
+    "is interpreted as a map from the first field to the list of the other fields\n" .
+    "Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field\n" .
+    "range in the input to apply the map to.\n" .
+    "e.g.: echo A B | apply_map.pl a.txt\n" .
+    "where a.txt is:\n" .
+    "A A1 A2\n" .
+    "B B1\n" .
+    "will produce:\n" .
+    "A1 B1\n" .
+    "A2 B1\n";
+  exit(1);
+}
+
+($map) = @ARGV;
+open(M, "<$map") || die "Error opening map file $map: $!";
+
+while (<M>) {
+  @A = split(" ", $_);
+  @A >= 1 || die "apply_map.pl: empty line.";
+  $i = shift @A;
+  $o = join(":", @A);
+  $map{$i} = $o;
+}
+
+sub printcontent {
+  (my $start, my @string)=@_;
+
+  if ( $start == @string ) { print join(" ",@string) . "\n";
+  } else {
+    my $tmp = $string[$start];
+    my @Word = split(":", $tmp);
+    if ( @Word != 1) {
+      foreach(@Word) {
+        $string[$start] = $_;
+        $start++;
+        &printcontent($start, @string);
+        $start--;
+      }
+    } else {
+      $start++;
+      &printcontent($start, @string);
+    }
+  }
+}
+
+while(<STDIN>) {
+  @A = split(" ", $_);
+  for ($x = 0; $x < @A; $x++) {
+    if ( (!defined $field_begin || $x >= $field_begin)
+         && (!defined $field_end || $x <= $field_end)) {
+      $a = $A[$x];
+      if (!defined $map{$a}) {
+        if (!$permissive) {
+          die "apply_map.pl: undefined key $a\n"; 
+        } else {
+          print STDERR "apply_map.pl: warning! missing key $a\n";
+        }
+      } else {
+        $A[$x] = $map{$a}; 
+      }
+    }
+  }
+  # print the content
+  &printcontent(0,@A);
+}
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
old mode 100644
new mode 100755
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
old mode 100644
new mode 100755
diff --git a/src/fvectorbin/Makefile b/src/fvectorbin/Makefile
index 7d826881cf6..73c81a4bbb6 100644
--- a/src/fvectorbin/Makefile
+++ b/src/fvectorbin/Makefile
@@ -6,7 +6,7 @@ include ../kaldi.mk
 LDFLAGS += $(CUDA_LDFLAGS)
 LDLIBS += $(CUDA_LDLIBS)
 
-BINFILES = nnet3-fvector-get-egs
+BINFILES = nnet3-fvector-get-egs nnet3-fvector-perturb-signal
 
 OBJFILES =
 

From c7ebfc4a0f296f947342b210e85d0d4ee4086a20 Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Mon, 16 Jan 2017 08:55:16 -0500
Subject: [PATCH 21/23] allocate_examples

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   |  16 +-
 .../steps/nnet3/fvector/allocate_examples.py  | 234 ++++++++++++++++++
 2 files changed, 248 insertions(+), 2 deletions(-)
 create mode 100755 egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
index 9d40d074a3e..406adda8c1d 100755
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -113,7 +113,13 @@ if [ $stage -le 2 ]; then
     cat $dir/perturb_recording_map > $dir/perturb_map
     cat $dir/perturb_utt_map >> $dir/perturb_map
     #segments
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments >$dir/segments
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments | \
+    awk '{
+       uttid=$1; start_time=$3; end_time=$4;
+       split(uttid,S,"[_]");
+       recordingid=S[1];
+       print uttid " " recordingid " " start_time " " end_time
+    }' >$dir/segments
     #text
     steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/text >$dir/text
     #utt2spk
@@ -123,7 +129,13 @@ if [ $stage -le 2 ]; then
   else #no segments->wav indexed by utterence-id/<recording-id> is equal to <utt-id>
     cp $dir/perturb_recording_map $dir/perturb_map
     #segments
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments >$dir/segments
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments | \
+    awk '{
+       uttid=$1; start_time=$3; end_time=$4;
+       split(uttid,S,"[_]");
+       recordingid=S[1];
+       print uttid " " recordingid " " start_time " " end_time
+    }' >$dir/segments
     #text
     steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/text >$dir/text
     #utt2spk
diff --git a/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py b/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py
new file mode 100755
index 00000000000..aae6c53c3b1
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+
+# This script, for use when training fvectors, decides for you which examples
+# will come from which utterances, and at what point.
+
+# You call it as (e.g.)
+#
+#  allocate_examples.py --frames-per-chunk=200  --frames-per-iter=1000000 \
+#   --num-archives=169 --num-jobs=24  exp/xvector_a/egs/temp/utt2len.train exp/xvector_a/egs
+#
+# and this program outputs certain things to the temp directory (exp/xvector_a/egs/temp in this case)
+# that will enable you to dump the chunks for xvector training.  What we'll eventually be doing is invoking
+# the following program with something like the following args:
+#
+#  nnet3-fvector-get-egs [options] exp/xvector_a/temp/ranges.1  scp:data/train/feats.scp \
+#    ark:exp/xvector_a/egs/egs_temp.1.ark ark:exp/xvector_a/egs/egs_temp.2.ark \
+#    ark:exp/xvector_a/egs/egs_temp.3.ark
+#
+# where exp/xvector_a/temp/ranges.1 contains something like the following:
+#
+#   <utt{i}-p{j}> <utt{i}-p{k}> 0 1 50 200
+#
+# where each line is interpreted as follows:
+#  <source-utterance1> <source-utterance2> <relative-archive-index> <absolute-archive-index> <offset> <frame-length>
+#
+#  Note: <relative-archive-index> is the zero-based offset of the archive-index
+# within the subset of archives that a particular ranges file corresponds to;
+# and <absolute-archive-index> is the 1-based numeric index of the destination
+# archive among the entire list of archives, which will form part of the
+# archive's filename (e.g. egs/egs.<absolute-archive-index>.ark);
+# <absolute-archive-index> is only kept for debug purposes so you can see which
+# archive each line corresponds to.
+#
+# The list of archives corresponding to ranges.n will be written to output.n, 
+# so in exp/xvector_a/temp/outputs.1 we'd have:
+#
+#  ark:exp/xvector_a/egs/egs_temp.1.ark ark:exp/xvector_a/egs/egs_temp.2.ark ark:exp/xvector_a/egs/egs_temp.3.ark
+#
+# The number of these files will equal 'num-jobs'.  If you add up the word-counts of
+# all the outputs.* files you'll get 'num-archives'.  The number of frames in each archive
+# will be about the --frames-per-iter.
+#
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import re, os, argparse, sys, math, warnings, random
+
+
+parser = argparse.ArgumentParser(description="Writes ranges.*, outputs.* and archive_chunk_lengths files "
+                                 "in preparation for dumping egs for xvector training.",
+                                 epilog="Called by steps/nnet3/xvector/get_egs.sh")
+parser.add_argument("--prefix", type=str, default="",
+                   help="Adds a prefix to the output files. This is used to distinguish between the train "
+                   "and diagnostic files.")
+parser.add_argument("--frames-per-chunk", type=int, default=100,
+                    help="The number of frames-per-chunk used for any archive")
+parser.add_argument("--frames-per-iter", type=int, default=1000000,
+                    help="Target number of frames for each archive")
+parser.add_argument("--num-archives", type=int, default=-1,
+                    help="Number of archives to write")
+parser.add_argument("--num-jobs", type=int, default=-1,
+                    help="Number of jobs we're going to use to write the archives; the ranges.* "
+                    "and outputs.* files are indexed by job.  Must be <= the --num-archives option.")
+parser.add_argument("--seed", type=int, default=1,
+                    help="Seed for random number generator")
+
+# now the positional arguments
+parser.add_argument("utt2len",
+                    help="utt2len file of the features to be used as input (format is: "
+                    "<utterance-id> <approx-num-frames>)")
+parser.add_argument("oriutt2allutt",
+                    help="oriutt2allutt to be used as input (format is: "
+                    "<ori-utt-id> <ori-utt-id> <p1-utt-id> ... <pn-utt-id>)")
+parser.add_argument("egs_dir",
+                    help="Name of egs directory, e.g. exp/xvector_a/egs")
+
+print(' '.join(sys.argv))
+
+args = parser.parse_args()
+
+if not os.path.exists(args.egs_dir + "/temp"):
+    os.makedirs(args.egs_dir + "/temp")
+
+## Check arguments.
+if args.frames_per_chunk <= 1:
+    sys.exit("--frames-per-chunk is invalid.")
+if args.frames_per_iter < 1000:
+    sys.exit("--frames-per-iter is invalid.")
+if args.num_archives < 1:
+    sys.exit("--num-archives is invalid")
+if args.num_jobs > args.num_archives:
+    sys.exit("--num-jobs is invalid (must not exceed num-archives)")
+
+random.seed(args.seed)
+
+f = open(args.utt2len, "r");
+if f is None:
+    sys.exit("Error opening utt2len file " + str(args.utt2len));
+utt_ids = []
+lengths = []
+for line in f:
+    a = line.split()
+    if len(a) != 2:
+        sys.exit("bad line in utt2len file " + line);
+    utt_ids.append(a[0])
+    lengths.append(int(a[1]))
+f.close()
+
+num_utts = len(utt_ids)
+max_length = max(lengths)
+
+if args.frames_per_chunk * 3 > max_length:
+    sys.exit("--max-frames-per-chunk={0} is not valid: it must be no more "
+             "than a third of the maximum length {1} from the utt2len file ".format(
+            args.max_frames_per_chunk, max_length))
+
+# create the map form ori-utt-id to all kinds of utt-id. The ori-utt-id is the 
+# index, which is same with the elements in utt_ids[]
+f = open(args.oriutt2allutt, "r");
+if f is None:
+    sys.exit("Error opening oriutt2allutt file " + str(args.oriutt2allutt));
+utt_map = {}
+for line in f:
+    a = line.split()
+    if len(a) < 3:
+        sys.exit("bad line in oriutt2allutt file " + line);
+    tmp_list = []
+    for i in range(1, len(a)):
+        tmp_list.append(a[i])
+    tuple_list = tuple(tmp_list)
+    utt_map[a[0]]=tuple_list
+f.close()
+
+    
+# this function returns a random integer utterance index, limited to utterances
+# above a minimum length in frames, with probability proportional to its length.
+def RandomUttAtLeastThisLong(min_length):
+    while True:
+        i = random.randrange(0, num_utts)
+        # read the next line as 'with probability lengths[i] / max_length'.
+        # this allows us to draw utterances with probability with
+        # prob proportional to their length.
+        if lengths[i] > min_length and random.random() < lengths[i] / float(max_length):
+            return i
+
+
+# given an utterance length utt_length (in frames) and two desired chunk lengths
+# (length1 and length2) whose sum is <= utt_length,
+# this function randomly picks the starting points of the chunks for you.
+# the chunks may appear randomly in either order.
+def GetRandomOffsets(utt_length, length):
+    if length > utt_length:
+        sys.exit("code error: tot-length > utt-length")
+    free_length = utt_length - length
+    offset = random.randrange(0, free_length + 1)
+    return offset
+
+
+# this function randomly choose two utt-id form utt_map depending on ori-utt-id
+def ChoosePairs(ori_utt_id):
+    this_tuple = utt_map[ori_utt_id]
+    while True:
+        first_index = random.randint(0, len(this_tuple) - 1)
+        second_index = random.randint(0, len(this_tuple) - 1)
+        if first_index != second_index:
+            break
+    utt_a = this_tuple[first_index]
+    utt_b = this_tuple[second_index]
+    return (utt_a, utt_b)
+
+
+# each element of all_egs (one per archive) is
+# an array of 2-tuples (utterance-index, offset)
+all_egs= []
+
+prefix = ""
+if args.prefix != "":
+  prefix = args.prefix + "_"
+
+for archive_index in range(args.num_archives):
+    tot_length = 2 * args.frames_per_chunk
+    this_num_egs = (args.frames_per_iter / tot_length) + 1
+    this_egs = [ ] # this will be an array of 2-tuples (utterance-index, start-frame).
+    for n in range(this_num_egs):
+        utt_index = RandomUttAtLeastThisLong(args.frames_per_chunk)
+        utt_len = lengths[utt_index]
+        offset = GetRandomOffsets(utt_len, args.frames_per_chunk)
+        this_egs.append( (utt_index, offset) )
+    all_egs.append(this_egs)
+
+# work out how many archives we assign to each job in an equitable way.
+num_archives_per_job = [ 0 ] * args.num_jobs
+for i in range(0, args.num_archives):
+    num_archives_per_job[i % args.num_jobs]  = num_archives_per_job[i % args.num_jobs] + 1
+
+
+cur_archive = 0
+for job in range(args.num_jobs):
+    this_ranges = []
+    this_archives_for_job = []
+    this_num_archives = num_archives_per_job[job]
+
+    for i in range(0, this_num_archives):
+        this_archives_for_job.append(cur_archive)
+        for (utterance_index, offset) in all_egs[cur_archive]:
+            this_ranges.append( (utterance_index, i, offset) )
+        cur_archive = cur_archive + 1
+    f = open(args.egs_dir + "/temp/" + prefix + "ranges." + str(job + 1), "w")
+    if f is None:
+        sys.exit("Error opening file " + args.egs_dir + "/temp/" + prefix + "ranges." + str(job + 1))
+    for (utterance_index, i, offset) in sorted(this_ranges):
+        archive_index = this_archives_for_job[i]
+        this_utt_id = utt_ids[utterance_index]
+        #Random select two utt-id
+        (utt_a, utt_b) = ChoosePairs(this_utt_id)
+        print("{0} {1} {2} {3} {4} {5}".format(utt_a,
+                                           utt_b,
+                                           i,
+                                           archive_index + 1,
+                                           offset
+                                           args.frames_per_chunk,
+              file=f)
+    f.close()
+
+    f = open(args.egs_dir + "/temp/" + prefix + "outputs." + str(job + 1), "w")
+    if f is None:
+        sys.exit("Error opening file " + args.egs_dir + "/temp/" + prefix + "outputs." + str(job + 1))
+    print( " ".join([ str("{0}/" + prefix + "egs_temp.{1}.ark").format(args.egs_dir, n + 1) for n in this_archives_for_job ]),
+           file=f)
+    f.close()
+
+
+print("allocate_examples.py: finished generating " + prefix + "ranges.* and " + prefix + "outputs.* files")
+

From 43db796d5297ed478454eb6641eae91ca8f8961e Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@a11.clsp.jhu.edu>
Date: Tue, 17 Jan 2017 03:30:53 -0500
Subject: [PATCH 22/23] get egs C++ code

---
 src/fvectorbin/nnet3-fvector-get-egs.cc | 242 +++++++++++++++---------
 1 file changed, 149 insertions(+), 93 deletions(-)

diff --git a/src/fvectorbin/nnet3-fvector-get-egs.cc b/src/fvectorbin/nnet3-fvector-get-egs.cc
index 2f7fdbfa748..dd05e1efe56 100644
--- a/src/fvectorbin/nnet3-fvector-get-egs.cc
+++ b/src/fvectorbin/nnet3-fvector-get-egs.cc
@@ -1,6 +1,6 @@
 // fvectorbin/nnet3-fvector-get-egs.cc
 
-// Copyright 2016  Johns Hopkins University (author:  Daniel Povey)
+// Copyright 2012-2016  Johns Hopkins University (author:  Daniel Povey)
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -19,67 +19,71 @@
 
 #include <sstream>
 
-#include "base/kaldi-common.h"
 #include "util/common-utils.h"
-#include "hmm/transition-model.h"
-#include "hmm/posterior.h"
 #include "nnet3/nnet-example.h"
 
 namespace kaldi {
 namespace nnet3 {
 
-
-static void ProcessFile(const MatrixBase<BaseFloat> &feats,
-                        const std::string &utt_id,
-                        bool compress,
-                        int32 left_context,
-                        int32 right_context,
-                        int32 frames_per_eg,
-                        int64 *num_frames_written,
-                        int64 *num_egs_written,
-                        NnetExampleWriter *example_writer) {
-  for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) {
-
-    // actual_frames_per_eg is the number of frames in center.
-    // At the end of the file we pad with zero posteriors
-    // so that all examples have the same structure (prevents the need
-    // for recompilations).
-    int32 actual_frames_per_eg = std::min(frames_per_eg,
-                                          feats.NumRows() - t);
-
-    int32 tot_frames = left_context + frames_per_eg + right_context;
-
-    Matrix<BaseFloat> input_frames(tot_frames, feats.NumCols(), kUndefined);
-    
-    // Set up "input_frames".
-    for (int32 j = -left_context; j < frames_per_eg + right_context; j++) {
-      int32 t2 = j + t;
-      if (t2 < 0) t2 = 0;
-      if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1;
-      SubVector<BaseFloat> src(feats, t2),
-                           dest(input_frames, j + left_context);
-      dest.CopyFromVec(src);
+// A struct for holding information about the position and
+// duration of each pair of chunks.
+struct FvectorChunkPairInfo {
+  std::string pair_name;
+  std::string utt_a;
+  std::string utt_b;
+  int32 output_archive_id;
+  int32 start_frame;
+  int32 num_frames;
+};
+
+// Process the range input file and store it as a map from utterance
+// name to vector of ChunkPairInfo structs.
+static void ProcessRangeFile(const std::string &range_rxfilename,
+                             std::vector<FvectorChunkPairInfo *> *pairs) {
+  Input range_input(range_rxfilename);
+  if (!range_rxfilename.empty()) {
+    std::string line;
+    while (std::getline(range_input.Stream(), line)) {
+      FvectorChunkPairInfo *pair = new FvectorChunkPairInfo();
+      std::vector<std::string> fields;
+      SplitStringToVector(line, " \t\n\r", true, &fields);
+      if (fields.size() != 6) {
+        KALDI_ERR << "Expected 6 fields in line of range file, got "
+                  << fields.size() << " instead.";
+      }
+
+      std::string utt_a = fields[0],
+                  utt_b = fields[1],
+                  start_frame_str = fields[4],
+                  num_frames_str = fields[5];
+
+      if (!ConvertStringToInteger(fields[2], &(pair->output_archive_id)) ||
+          !ConvertStringToInteger(start_frame_str, &(pair->start_frame)) ||
+          !ConvertStringToInteger(num_frames_str, &(pair->num_frames))) {
+        KALDI_ERR << "Expected integer for output archive in range file.";
+      }
+      pair->pair_name = utt_a + "-" + utt_b + "-" + start_frame_str + "-"
+                        + num_frames_str;
+      pair->utt_a = utt_a;
+      pair->utt_b = utt_b;
+
+      pairs->push_back(pair);
     }
-
-    NnetExample eg;
-    
-    // call the regular input "input".
-    eg.io.push_back(NnetIo("input", -left_context, input_frames));
-   
-    if (compress) { eg.Compress();}
-      
-    std::ostringstream os;
-    os << utt_id << "-" << t;
-
-    std::string key = os.str(); // key is <utt_id>-<frame_id>
-
-    *num_frames_written += actual_frames_per_eg;
-    *num_egs_written += 1;
-
-    example_writer->Write(key, eg);
   }
 }
 
+// Delete the dynamically allocated memory.
+static void Cleanup(std::vector<FvectorChunkPairInfo *> *pairs,
+                    std::vector<NnetExampleWriter *> *writers) {
+  for (std::vector<NnetExampleWriter *>::iterator
+      it = writers->begin(); it != writers->end(); ++it) {
+    delete *it;
+  }
+  for (std::vector<FvectorChunkPairInfo *>::iterator it = pairs->begin();
+       it != pairs->end(); ++it) {
+    delete *it;
+  }
+}
 
 } // namespace nnet3
 } // namespace kaldi
@@ -89,65 +93,117 @@ int main(int argc, char *argv[]) {
     using namespace kaldi;
     using namespace kaldi::nnet3;
     typedef kaldi::int32 int32;
-    typedef kaldi::int64 int64;
 
     const char *usage =
-        "Get frame-by-frame examples of data for nnet3 neural network training.\n"
-        "Essentially this is a format change from features into a special frame-by-frame format.\n"
-        "This program handles the common case where you have some input features\n"
-        "and convert them to fvector examples format\n"
-        "Note: In fvector version, there is no need for iVectors, posterior and labels.\n"
+        "Get examples for training an nnet3 neural network for the fvector\n"
+        "system.  Each output example contains a pair of feature chunks from\n"
+        "the specified utterance.  The location and length of the feature chunks\n"
+        "are specified in the 'ranges' file.  Each line is interpreted as\n"
+        "follows:\n"
+        "<source-utterance1> <source-utterance2> <relative-output-archive-index> "
+        "<absolute-archive-index>  <start-frame> <num-frames1> "
+        "<start-frame-index2> <num-frames2>\n"
+        "where <relative-output-archive-index> is interpreted as a zero-based\n"
+        "index into the wspecifiers specified on the command line (<egs-0-out>\n"
+        "and so on), and <absolute-archive-index> is ignored by this program.\n"
+        "For example:\n"
+        "  utt1-p1 utt1-p2 3  13  5   65\n"
+        "  utt2    utt2-pn 0  10  160 50\n"
         "\n"
-        "Usage:  nnet3-fvector-get-egs [options] <features-rspecifier> <egs-out>\n"
+        "Usage:  nnet3-fvector-get-egs [options] <ranges-filename> "
+        "<features-rspecifier> <egs-0-out> <egs-1-out> ... <egs-N-1-out>\n"
         "\n"
-        "An example [where $feats expands to the actual features]:\n"
-        "nnet3-fvector-get-egs --left-context=12 --right-context=9 --compress=true \"$feats\" \\\n"
-        "\"ark:train.egs\"\n";
-        
+        "For example:\n"
+        "nnet3-fvector-get-egs ranges.1 \"$feats\" ark:egs_temp.1.ark"
+        "  ark:egs_temp.2.ark ark:egs_temp.3.ark\n";
 
     bool compress = true;
-    int32 left_context = 0, right_context = 0, num_frames = 1;
-    
+
     ParseOptions po(usage);
     po.Register("compress", &compress, "If true, write egs in "
                 "compressed format.");
-    po.Register("left-context", &left_context, "Number of frames of left "
-                "context the neural net requires.");
-    po.Register("right-context", &right_context, "Number of frames of right "
-                "context the neural net requires.");
-    po.Register("num-frames", &num_frames, "Number of frames is central");
-    
+
     po.Read(argc, argv);
 
-    if (po.NumArgs() != 2) {
+    if (po.NumArgs() < 3) {
       po.PrintUsage();
       exit(1);
     }
 
-    std::string feature_rspecifier = po.GetArg(1),
-                examples_wspecifier = po.GetArg(2);
-
-    // Read in all the training files.
-    SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
-    NnetExampleWriter example_writer(examples_wspecifier);
-    
-    int32 num_done = 0;
-    int64 num_frames_written = 0, num_egs_written = 0;
-    
-    for (; !feat_reader.Done(); feat_reader.Next()) {
-      std::string key = feat_reader.Key();
-      const Matrix<BaseFloat> &feats = feat_reader.Value();
-      ProcessFile(feats, key, compress, left_context, right_context,
-                  num_frames, &num_frames_written, &num_egs_written,
-                  &example_writer);
-      num_done++;
+    std::string range_rspecifier = po.GetArg(1);
+    std::string feature_rspecifier = po.GetArg(2);
+    std::vector<NnetExampleWriter *> example_writers;
+
+    for (int32 i = 3; i <= po.NumArgs(); i++) {
+      example_writers.push_back(new NnetExampleWriter(po.GetArg(i)));
+    }
+
+    std::vector<FvectorChunkPairInfo *> pairs;
+    // deal with the ranges file and initalize the vector
+    ProcessRangeFile(range_rspecifier, &pairs);
+
+    RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
+
+    int32 num_error = 0,
+          num_egs_written = 0;
+
+    for (std::vector<FvectorChunkPairInfo *>::iterator iter = pairs.begin();
+         iter != pairs.end(); iter++) {
+
+      FvectorChunkPairInfo *pair = *iter;
+      // get the features
+      if (!feature_reader.HasKey(pair->utt_a) || !feature_reader.HasKey(pair->utt_b)) {
+        num_error++;
+        KALDI_WARN << "The feature " << pair->utt_a << " or " << pair->utt_b
+                   << " is not found.";
+        continue;
+      }
+      const Matrix<BaseFloat> &feats_a = feature_reader.Value(pair->utt_a);
+      const Matrix<BaseFloat> &feats_b = feature_reader.Value(pair->utt_b);
+      int32 num_rows = feats_a.NumRows(),
+            feat_dim = feats_a.NumCols();
+      if (num_rows < (pair->start_frame + pair->num_frames)) {
+        num_error++;
+        KALDI_WARN << "Unable to create examples for utterance " << pair->pair_name
+                   << ". Requested chunk boundary is the "
+                   << (pair->start_frame + pair->num_frames)
+                   << "th frmae, but utterance has only " << num_rows << " frames.";
+        continue;
+      } else {
+        SubMatrix<BaseFloat> chunk1(feats_a, pair->start_frame,
+                                    pair->num_frames, 0, feat_dim),
+                             chunk2(feats_b, pair->start_frame,
+                                    pair->num_frames, 0, feat_dim);
+        NnetIo nnet_io1 = NnetIo("input", 0, chunk1),
+               nnet_io2 = NnetIo("input", 0, chunk2);
+        for (std::vector<Index>::iterator indx_it = nnet_io1.indexes.begin();
+            indx_it != nnet_io1.indexes.end(); ++indx_it) {
+          indx_it->n = 0;
+        }
+        for (std::vector<Index>::iterator indx_it = nnet_io2.indexes.begin();
+            indx_it != nnet_io2.indexes.end(); ++indx_it) {
+          indx_it->n = 1;
+        }
+        NnetExample eg;
+        eg.io.push_back(nnet_io1);
+        eg.io.push_back(nnet_io2);
+        if (compress)
+          eg.Compress();
+
+        if (pair->output_archive_id >= example_writers.size()) {
+          KALDI_ERR << "Requested output index exceeds number of specified "
+                    << "output files.";
+        }
+        example_writers[pair->output_archive_id]->Write(pair->pair_name, eg);
+        num_egs_written += 1;
+      }
     }
+    Cleanup(&pairs, &example_writers);
 
     KALDI_LOG << "Finished generating examples, "
-              << "successfully processed " << num_done
-              << " feature files, wrote " << num_egs_written << " examples, "
-              << " with " << num_frames_written << " egs in total.";
-    return (num_egs_written == 0 || num_done == 0 ? 1 : 0);
+              << "successfully wrote " << num_egs_written << " examples; "
+              << num_error << " files had errors.";
+    return (num_egs_written == 0);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
     return -1;

From 752cf66fd15c1f1d06013d0985be45dd4cee942a Mon Sep 17 00:00:00 2001
From: Hang Lyu <hlyu@b01.clsp.jhu.edu>
Date: Mon, 30 Jan 2017 03:45:24 -0500
Subject: [PATCH 23/23] now it can generate the egs

---
 egs/wsj/s5/steps/nnet3/fvector/add_noise.sh   | 156 ++++++++++++------
 .../steps/nnet3/fvector/allocate_examples.py  |  18 +-
 .../nnet3/fvector/generate_noise_range.py     |   8 +-
 .../fvector/generate_perturb_wav_specifier.py |   2 +-
 src/fvectorbin/Makefile                       |   2 +-
 src/fvectorbin/nnet3-fvector-get-egs.cc       |  21 ++-
 .../nnet3-fvector-perturb-signal.cc           |  49 ++++--
 tools/config/common_path.sh                   |   1 +
 8 files changed, 169 insertions(+), 88 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
index 406adda8c1d..205c775e730 100755
--- a/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
+++ b/egs/wsj/s5/steps/nnet3/fvector/add_noise.sh
@@ -11,7 +11,7 @@
 # Begin Configuration section.
 stage=0
 cmd=run.pl
-nj=4
+nj=8
 # Begain Configuration.
 min_additive_noise_len=2.0       # the minimum duration of each noise file in seconds.
 num_ranges_per_wav=4             # the number of noise ranges for each wav.
@@ -20,6 +20,13 @@ max_snr=-15                      # the maximum snr value in dB.
 seed=-1                          # set the random seed.
 variable_len_additive_noise=true #If true, generate the variable-length range files.
                                  #If false, generate the fixed-length range files.
+# Begin Configuration of section 6
+# for the details, please see steps/nnet3/fvector/get_egs.sh
+frames_per_chunk=200
+frames_per_iter=1000000
+frames_per_iter_diagnostic=1000000
+num_diagnostic_archives=3
+num_heldout_utts=500
 # End Configuration options.
 
 echo "$0 $@" # Print the command line for logging
@@ -27,59 +34,63 @@ echo "$0 $@" # Print the command line for logging
 [ -f path.sh ] && . ./path.sh # source the path.
 . parse_options.sh || exit 1;
 
-if [ $# != 3 ]; then
-  echo "usage: steps/nnet3/fvector/add_noise.sh <data-dir> <noise-dir> <range-dir>"
-  echo "e.g.:  steps/nnet3/fvector/add_noise.sh data/train data/noise ranges"
+if [ $# != 4 ]; then
+  echo "usage: steps/nnet3/fvector/add_noise.sh <data-dir> <noise-dir> <range-dir> <exp-dir>"
+  echo "e.g.:  steps/nnet3/fvector/add_noise.sh data/train data/noise data/perturbed exp/fvector_a/egs"
   echo "main options (for others, see top of script file)"
   echo "  --min-additive-noise-len <second>                # limit the minimum length of noise" 
   echo "  --num-ranges-per-wav <n>                         # number of noise range kinds"
   echo "  --variable-len-additive-noise (true|false)       # decide fixed/variable version"
   echo "  --nj <nj>                                        # number of parallel jobs"
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs"
+  exit 1
 fi
 
 data=$1  # contain wav.scp
 noise=$2 # contain noise.scp 
 dir=$3   # eg: data/perturbed
+exp=$4   # the directory used to store the egs
 
-# remove the segments so that the duration corresponding to recording-id
-if [ -f $data/segments ]; then
-  mv $data/segments $data/segments_backup
-  if [ -f $data/utt2dur ]; then
-    mv $data/utt2dur $data/utt2dur.backup
-    utils/data/get_utt2dur.sh $data
+if [ $stage -le 0 ];then
+  echo "The 1st stage: generating the duration file for each recording"
+  # remove the segments so that the duration corresponding to recording-id
+  if [ -f $data/segments ]; then
+    mv $data/segments $data/segments_backup
+    if [ -f $data/utt2dur ]; then
+      mv $data/utt2dur $data/utt2dur.backup
+      utils/data/get_utt2dur.sh $data
+    else
+      utils/data/get_utt2dur.sh $data
+    fi
+    mv $data/segments_backup $data/segments
   else
-    utils/data/get_utt2dur.sh $data
+    if [ ! -f $data/utt2dur ]; then
+      # get original clean wav's duration
+      utils/data/get_utt2dur.sh $data
+    fi 
   fi
-  mv $data/segments_backup $data/segments
-else
-  if [ ! -f $data/utt2dur ]; then
-    # get original clean wav's duration
-    utils/data/get_utt2dur.sh $data
-  fi 
-fi
 
-# remove the segments so that the duration corresponding to recording-id
-if [ -f $noise/segments ]; then
-  mv $noise/segments $noise/segments_backup
-  if [ -f $noise/utt2dur ]; then
-    mv $noise/utt2dur $noise/utt2dur.backup
-    utils/data/get_utt2dur.sh $noise
+  # remove the segments so that the duration corresponding to recording-id
+  if [ -f $noise/segments ]; then
+    mv $noise/segments $noise/segments_backup
+    if [ -f $noise/utt2dur ]; then
+      mv $noise/utt2dur $noise/utt2dur.backup
+      utils/data/get_utt2dur.sh $noise
+    else
+      utils/data/get_utt2dur.sh $noise
+    fi
+    mv $noise/segments_backup $noise/segments
   else
-    utils/data/get_utt2dur.sh $noise
+    if [ ! -f $noise/utt2dur ]; then
+      # get original clean wav's duration
+      utils/data/get_utt2dur.sh $noise
+    fi 
   fi
-  mv $noise/segments_backup $noise/segments
-else
-  if [ ! -f $noise/utt2dur ]; then
-    # get original clean wav's duration
-    utils/data/get_utt2dur.sh $noise
-  fi 
 fi
 
-
 mkdir -p $dir/log
-if [ $stage -le 0 ]; then
-  echo "$0: generate $num_kind_rage kinds of noise range for each original wav"
+if [ $stage -le 1 ]; then
+    echo "The 2nd stage: generate $num_kind_rage kinds of noise range for each original wav"
   $cmd $dir/log/generate_noise_range.log \
     steps/nnet3/fvector/generate_noise_range.py \
       --num-ranges-per-wav=$num_ranges_per_wav \
@@ -91,60 +102,85 @@ if [ $stage -le 0 ]; then
       $data/utt2dur $noise/utt2dur $dir/ranges $dir/wav2perturbedwav
 fi
 
-if [ $stage -le 1 ]; then
-  echo "$0: generate perturbed_wav_specifier"
+if [ $stage -le 2 ]; then
+  echo "The 3rd stage: generate perturbed_wav_specifier"
   $cmd $dir/log/generate_perturb_wav_specifier.log \
     steps/nnet3/fvector/generate_perturb_wav_specifier.py \
       --noise=$noise/wav.scp \
-      $data/wav.scp $dir/ranges $dir/wav2perturbedwav $dir/wav.scp
+      $data/wav.scp $dir/ranges $dir/wav2perturbedwav $dir/tmp.wav.scp
+  cat $dir/tmp.wav.scp | sort > $dir/wav.scp
+  rm -f $dir/tmp.wav.scp
 fi
 
-if [ $stage -le 2 ]; then
-  echo "$0: generate other files in data directory"
+if [ $stage -le 3 ]; then
+  echo "The 4th stage: generate other files in data directory"
   #reco2file_and_channel
-  cat $dir/wav2perturbedwav | cut -d ' ' -f 1 | paste -d ' ' - $dir/wav2perturbedwav > $dir/perturb_recording_map
-  steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_recording_map <$data/reco2file_and_channel >$dir/reco2file_and_channel
+  cat $dir/wav2perturbedwav | cut -d ' ' -f 1 | paste -d ' ' - $dir/wav2perturbedwav > $dir/.perturb_recording_map
+  steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_recording_map <$data/reco2file_and_channel >$dir/reco2file_and_channel
   if [ -f $data/segments ]; then
     awk -v num=$num_ranges_per_wav '{
       printf("%s %s",$1, $1);
-      for(i=1; i<= num; i++){ printf(" %s%s-%s","perturb", i, $1); }
+      for(i=1; i<= num; i++){ printf(" %s-%s%s", $1, "perturbed", i); }
       printf("\n");
-    }' <$data/segments > $dir/perturb_utt_map
-    cat $dir/perturb_recording_map > $dir/perturb_map
-    cat $dir/perturb_utt_map >> $dir/perturb_map
+    }' <$data/segments > $dir/.perturb_utt_map
+    cat $dir/.perturb_recording_map > $dir/.perturb_map
+    cat $dir/.perturb_utt_map >> $dir/.perturb_map
+    cp $dir/.perturb_utt_map $dir/uniq2utt
     #segments
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments | \
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_map <$data/segments | \
     awk '{
        uttid=$1; start_time=$3; end_time=$4;
        split(uttid,S,"[_]");
-       recordingid=S[1];
+       if ( S[2] ~/.*-perturb.*$/ ) {
+         split(S[2],S1,"[-]");
+         recordingid=(S[1]"-"S1[3]);
+       } else {
+         recordingid=S[1]; 
+       }
        print uttid " " recordingid " " start_time " " end_time
     }' >$dir/segments
     #text
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/text >$dir/text
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_map <$data/text | sort >$dir/text
     #utt2spk
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/utt2spk >$dir/utt2spk
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_map <$data/utt2spk | sort >$dir/utt2spk
     #spk2utt
     utt2spk_to_spk2utt.pl <$dir/utt2spk | sort > $dir/spk2utt
   else #no segments->wav indexed by utterence-id/<recording-id> is equal to <utt-id>
-    cp $dir/perturb_recording_map $dir/perturb_map
+    cp $dir/.perturb_recording_map $dir/.perturb_map
+    cp $dir/.perturb_map $dir/uniq2utt
     #segments
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/segments | \
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_map <$data/segments | \
     awk '{
        uttid=$1; start_time=$3; end_time=$4;
        split(uttid,S,"[_]");
-       recordingid=S[1];
+       if ( S[2] ~/.*-perturb.*$/ ) {
+         split(S[2],S1,"[-]");
+         recordingid=(S[1]"-"S1[3]);
+       } else {
+         recordingid=S[1]; 
+       }
        print uttid " " recordingid " " start_time " " end_time
-    }' >$dir/segments
+    }' | sort >$dir/segments
     #text
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/text >$dir/text
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_map <$data/text | sort >$dir/text
     #utt2spk
-    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/perturb_map <$data/utt2spk >$dir/utt2spk
+    steps/nnet3/fvector/apply_map_one2mult.pl -f 1 $dir/.perturb_map <$data/utt2spk | sort >$dir/utt2spk
     #spk2utt
     utt2spk_to_spk2utt.pl <$dir/utt2spk | sort > $dir/spk2utt
   fi
 fi
+#Now, we have already finished generating the perturbed data directory.
 
+if [ $stage -le 4 ]; then
+  echo "The 5th stage: make features."
+  mfccdir=mfcc
+  dir_basename=$(basename $dir)
+  steps/make_mfcc.sh --nj 50 --cmd "$cmd" \
+    $dir exp/make_mfcc/$dir_basename $mfccdir
+  steps/compute_cmvn_stats.sh $dir exp/make_mfcc/$dir_basename $mfccdir
+fi
+
+#restore
 if [ -f $data/utt2dur.backup ]; then
   mv $data/utt2dur.backup $data/utt2dur
 fi
@@ -152,4 +188,14 @@ if [ -f $noise/utt2dur.backup ]; then
   mv $noise/utt2dur.backup $noise/utt2dur
 fi
 
+if [ $stage -le 5 ]; then
+  echo "The 6th stage: generate egs."
+  steps/nnet3/fvector/get_egs.sh \
+    --frames-per-chunk $frames_per_chunk \
+    --frames-per-iter $frames_per_iter \
+    --frames-per-iter-diagnostic $frames_per_iter_diagnostic \
+    --num-diagnostic-archives $num_diagnostic_archives \
+    --num-heldout-utts $num_heldout_utts \
+    $dir $exp
+fi
 exit 0
diff --git a/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py b/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py
index aae6c53c3b1..219356e7388 100755
--- a/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/allocate_examples.py
@@ -6,17 +6,17 @@
 # You call it as (e.g.)
 #
 #  allocate_examples.py --frames-per-chunk=200  --frames-per-iter=1000000 \
-#   --num-archives=169 --num-jobs=24  exp/xvector_a/egs/temp/utt2len.train exp/xvector_a/egs
+#   --num-archives=169 --num-jobs=24  exp/fvector_a/egs/temp/utt2len.train exp/fvector_a/egs
 #
 # and this program outputs certain things to the temp directory (exp/xvector_a/egs/temp in this case)
 # that will enable you to dump the chunks for xvector training.  What we'll eventually be doing is invoking
 # the following program with something like the following args:
 #
-#  nnet3-fvector-get-egs [options] exp/xvector_a/temp/ranges.1  scp:data/train/feats.scp \
-#    ark:exp/xvector_a/egs/egs_temp.1.ark ark:exp/xvector_a/egs/egs_temp.2.ark \
-#    ark:exp/xvector_a/egs/egs_temp.3.ark
+#  nnet3-fvector-get-egs [options] exp/fvector_a/temp/ranges.1  scp:data/train/feats.scp \
+#    ark:exp/fvector_a/egs/egs_temp.1.ark ark:exp/fvector_a/egs/egs_temp.2.ark \
+#    ark:exp/fvector_a/egs/egs_temp.3.ark
 #
-# where exp/xvector_a/temp/ranges.1 contains something like the following:
+# where exp/fvector_a/temp/ranges.1 contains something like the following:
 #
 #   <utt{i}-p{j}> <utt{i}-p{k}> 0 1 50 200
 #
@@ -32,9 +32,9 @@
 # archive each line corresponds to.
 #
 # The list of archives corresponding to ranges.n will be written to output.n, 
-# so in exp/xvector_a/temp/outputs.1 we'd have:
+# so in exp/fvector_a/temp/outputs.1 we'd have:
 #
-#  ark:exp/xvector_a/egs/egs_temp.1.ark ark:exp/xvector_a/egs/egs_temp.2.ark ark:exp/xvector_a/egs/egs_temp.3.ark
+#  ark:exp/fvector_a/egs/egs_temp.1.ark ark:exp/fvector_a/egs/egs_temp.2.ark ark:exp/fvector_a/egs/egs_temp.3.ark
 #
 # The number of these files will equal 'num-jobs'.  If you add up the word-counts of
 # all the outputs.* files you'll get 'num-archives'.  The number of frames in each archive
@@ -217,8 +217,8 @@ def ChoosePairs(ori_utt_id):
                                            utt_b,
                                            i,
                                            archive_index + 1,
-                                           offset
-                                           args.frames_per_chunk,
+                                           offset,
+                                           args.frames_per_chunk),
               file=f)
     f.close()
 
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
index ed147b27d40..e29359b8e9b 100755
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_noise_range.py
@@ -144,10 +144,10 @@ def GenerateFixedLengthRangeFile():
         # We generate $num_ranges_per_wav ranges
         for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
-            print("{1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end=" ", file=f)
+            print("{0}-{1}".format(wav_ids[i], "perturbed"+str(j+1)), end=" ", file=f)
 
             # print the perturbedwav_id
-            print(" {1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end="", file=g)
+            print(" {0}-{1}".format(wav_ids[i], "perturbed"+str(j+1)), end="", file=g)
 
             # select a number from [1 ... max_num_additive_noise]
             num_additive_noise = random.randint(1, max_num_additive_noise)
@@ -246,10 +246,10 @@ def GenerateVariableLengthRangeFile():
         # We generate $num_ranges_per_wav ranges
         for j in range(0, args.num_ranges_per_wav):
             # print the perturbed wav id in the beginning of line
-            print("{1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end=" ", file=f)
+            print("{0}-{1}".format(wav_ids[i], "perturbed"+str(j+1)), end=" ", file=f)
             
             # print the perturbedwav_id
-            print(" {1}-{0}".format(wav_ids[i], "perturbed"+str(j+1)), end="", file=g)
+            print(" {0}-{1}".format(wav_ids[i], "perturbed"+str(j+1)), end="", file=g)
 
             # generate range file
             # format: wav_t_start:wav_t_end:noise_name:noise_t_start:noise_t_end:snr,
diff --git a/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
index 58892782f05..fc49a6bc4df 100755
--- a/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
+++ b/egs/wsj/s5/steps/nnet3/fvector/generate_perturb_wav_specifier.py
@@ -88,7 +88,7 @@
     for i in range(1, len(wav_list)):
         current_perturbed_wav_id = wav_list[i]
         current_perturbed_wav_index = perturbed_range_ids.index(current_perturbed_wav_id)
-        print('''{0} {1} nnet3-fvector-perturb-signal --noise-scp=scp:{2} --noise=\"{3}\" - |'''.format(
+        print('''{0} {1} nnet3-fvector-perturb-signal --noise=scp:{2} --noise-range=\"{3}\" - - |'''.format(
             current_perturbed_wav_id,
             wav_extended_files[current_wav_index],
             args.noise,
diff --git a/src/fvectorbin/Makefile b/src/fvectorbin/Makefile
index 73c81a4bbb6..48709027de1 100644
--- a/src/fvectorbin/Makefile
+++ b/src/fvectorbin/Makefile
@@ -6,7 +6,7 @@ include ../kaldi.mk
 LDFLAGS += $(CUDA_LDFLAGS)
 LDLIBS += $(CUDA_LDLIBS)
 
-BINFILES = nnet3-fvector-get-egs nnet3-fvector-perturb-signal
+BINFILES = nnet3-fvector-get-egs nnet3-fvector-perturb-signal nnet3-fvector-get-egs-simple
 
 OBJFILES =
 
diff --git a/src/fvectorbin/nnet3-fvector-get-egs.cc b/src/fvectorbin/nnet3-fvector-get-egs.cc
index dd05e1efe56..4e3179eb2d5 100644
--- a/src/fvectorbin/nnet3-fvector-get-egs.cc
+++ b/src/fvectorbin/nnet3-fvector-get-egs.cc
@@ -158,21 +158,26 @@ int main(int argc, char *argv[]) {
                    << " is not found.";
         continue;
       }
-      const Matrix<BaseFloat> &feats_a = feature_reader.Value(pair->utt_a);
-      const Matrix<BaseFloat> &feats_b = feature_reader.Value(pair->utt_b);
+      const Matrix<BaseFloat> feats_a = feature_reader.Value(pair->utt_a);
+      const Matrix<BaseFloat> feats_b = feature_reader.Value(pair->utt_b);
       int32 num_rows = feats_a.NumRows(),
             feat_dim = feats_a.NumCols();
-      if (num_rows < (pair->start_frame + pair->num_frames)) {
+      if (num_rows < pair->num_frames) {
         num_error++;
         KALDI_WARN << "Unable to create examples for utterance " << pair->pair_name
-                   << ". Requested chunk boundary is the "
-                   << (pair->start_frame + pair->num_frames)
-                   << "th frmae, but utterance has only " << num_rows << " frames.";
+                   << ". Requested chunk size is "
+                   << pair->num_frames
+                   << ", but utterance has only " << num_rows << " frames.";
         continue;
       } else {
-        SubMatrix<BaseFloat> chunk1(feats_a, pair->start_frame,
+        // As the utt2len file is not the exact frames of a utterance, so the
+        // requested chunk positions are approximate. It's possible that they
+        // slightly exceed the number of frames in the utterance.
+        // If that occurs, we can shift the chunks location back slightly.
+        int32 shift = std::min(0, num_rows - pair->start_frame - pair->num_frames);
+        SubMatrix<BaseFloat> chunk1(feats_a, pair->start_frame + shift,
                                     pair->num_frames, 0, feat_dim),
-                             chunk2(feats_b, pair->start_frame,
+                             chunk2(feats_b, pair->start_frame + shift,
                                     pair->num_frames, 0, feat_dim);
         NnetIo nnet_io1 = NnetIo("input", 0, chunk1),
                nnet_io2 = NnetIo("input", 0, chunk2);
diff --git a/src/fvectorbin/nnet3-fvector-perturb-signal.cc b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
index 52992c173c0..a4459ce6d50 100644
--- a/src/fvectorbin/nnet3-fvector-perturb-signal.cc
+++ b/src/fvectorbin/nnet3-fvector-perturb-signal.cc
@@ -61,10 +61,10 @@ void GenerateController(std::vector<std::string> &segments,
 }
 
 void ApplyNoise(std::string &noise_scp, const std::vector<AdditiveNoiseRange> &controller,
-                const VectorBase<BaseFloat> &input_wav, VectorBase<BaseFloat> *perturbed_wav) {
+                const VectorBase<BaseFloat> &input_wav, const int &samp_freq_input,
+                VectorBase<BaseFloat> *perturbed_wav) {
   // about noise list
   RandomAccessTableReader<WaveHolder> noise_reader(noise_scp);
-  int samp_freq_input = input_wav.Dim();
 
   // add noise
 
@@ -83,7 +83,35 @@ void ApplyNoise(std::string &noise_scp, const std::vector<AdditiveNoiseRange> &c
     int32 noise_start_point = samp_freq_noise * controller[i].noise_t_start;
     int32 noise_end_point = samp_freq_noise * controller[i].noise_t_end - 1;
     BaseFloat snr = controller[i].snr;
+    // This part is used to deal with the precise problem.
+    // e.g. If the wav_t_start = 259.49, the sample frequency is 8000. In theroy,
+    // the wav_start_point is 2075920, however, it will be 2075919 in practise.
+    int32 input_length = input_end_point - input_start_point + 1;
+    int32 noise_length = noise_end_point - noise_start_point + 1;
+    if (input_length != noise_length) {
+      int32 delta = (input_length > noise_length?(input_length - noise_length)
+                                                :(noise_length-input_length));
+      if (delta < 0.01*samp_freq_input) {
+        if (input_length > noise_length) {
+          input_end_point = input_end_point - delta;
+        } else {
+          noise_end_point = noise_end_point - delta;
+        }
+      } else {
+        KALDI_ERR << "There is a problem about input length does not match noise length"
+                  << " where the noise-id is: " << controller[i].noise_uttid
+                  << ", the input length is: " << input_length
+                  << ", the noise length is: " << noise_length << std::endl; 
+      }
+    }
 
+    // End sample must be less than total number
+    if ((input_end_point > input_wav.Dim()-1) || (noise_end_point > noise.Dim()-1)) {
+      int32 over_boundary = ((input_end_point - input_wav.Dim() + 1) > (noise_end_point - noise.Dim() + 1) ?
+                             (input_end_point - input_wav.Dim() + 1) : (noise_end_point - noise.Dim() + 1));
+      input_end_point = input_end_point - over_boundary;
+      noise_end_point = noise_end_point - over_boundary;
+    }
     // The input vector and noise vector contain the whole content of utt seperately.
     // According to the AdditiveNoiseRange, we stepwise add the additive noise to input.
     // To save the space, we use Subvector, because it returns the pointer.
@@ -159,18 +187,19 @@ int main(int argc, char *argv[]) {
     // Generate the Noise Controller list
     std::vector<AdditiveNoiseRange> controller;
     if (!noise_range.empty()) {
-      int index = noise_range.find_first_of(" ");
-      std::string perturbed_utt_id = noise_range.substr(0, index);
-      std::string noise_range_content = noise_range.substr(index+1);
+      //int index = noise_range.find_first_of(" ");
+      //std::string perturbed_utt_id = noise_range.substr(0, index);
+      //std::string noise_range_content = noise_range.substr(index+1);
       std::vector<std::string> segments;
-      SplitStringToVector(noise_range_content, ",", true, &segments);
+      SplitStringToVector(noise_range, ",", true, &segments);
       GenerateController(segments, &controller);
     }
 
+    bool binary = true;
     WaveData input_wave;
     {
       WaveHolder waveholder;
-      Input ki(input_wave_file);
+      Input ki(input_wave_file, &binary);
       waveholder.Read(ki.Stream());
       input_wave = waveholder.Value();
     }
@@ -189,14 +218,14 @@ int main(int argc, char *argv[]) {
 
     // new output vector and add noise
     Vector<BaseFloat> output(input);
-    ApplyNoise(noise, controller, input, &output);
+    ApplyNoise(noise, controller, input, samp_freq_input, &output);
 
     Matrix<BaseFloat> out_matrix(1, num_samp_input);
     out_matrix.CopyRowsFromVec(output);
 
     WaveData out_wave(samp_freq_input, out_matrix);
-    Output ko(output_wave_file, false);
-    out_wave.Write(ko.Stream());
+    Output ko(output_wave_file, binary, false);
+    WaveHolder::Write(ko.Stream(), true, out_wave);
 
     return 0;
   } catch(const std::exception &e) {
diff --git a/tools/config/common_path.sh b/tools/config/common_path.sh
index 5534cf2d13b..f95c5acf8e6 100644
--- a/tools/config/common_path.sh
+++ b/tools/config/common_path.sh
@@ -21,4 +21,5 @@ ${KALDI_ROOT}/src/onlinebin:\
 ${KALDI_ROOT}/src/sgmm2bin:\
 ${KALDI_ROOT}/src/sgmmbin:\
 ${KALDI_ROOT}/src/xvectorbin:\
+${KALDI_ROOT}/src/fvectorbin:\
 $PATH